diff --git a/22-05-21-Decision_Tree/Decision_Tree_Solution_2.ipynb b/22-05-21-Decision_Tree/Decision_Tree_Solution_2.ipynb new file mode 100644 index 0000000..469e932 --- /dev/null +++ b/22-05-21-Decision_Tree/Decision_Tree_Solution_2.ipynb @@ -0,0 +1,7666 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "e033a865-17a2-466a-8cde-a39cc6752deb", + "_uuid": "c9e2806b423b6385a8d876545ab668324b6bb451", + "id": "EbKrWuThjSNy" + }, + "source": [ + "# Bank Marketing Data - A Decision Tree Approach" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "459ad1ed-0e10-4dd1-a4f1-893ba5368175", + "_uuid": "08bb7d7c3677ca15a39a3569cef5d2071e9b015e", + "id": "AOo0cVvFjSN1" + }, + "source": [ + "## Aim:\n", + "The aim of this attempt is to predict if the client will subscribe (yes/no) to a term deposit, by building a classification model using Decision Tree.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "9e92f1a3-3e81-4ec6-a567-890827b0555c", + "_uuid": "e5338e8c3fa6dc8f410d8b868aa78cb54621780b", + "collapsed": true, + "id": "RRTIuD1KjSN2" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import plotly.express as px\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn import tree\n", + "from sklearn import metrics" + ] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "OVLyUP7vey5e" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "link for DAta set https://raw.githubusercontent.com/Ramanand-Yadav/DataSet/main/bank.csv" + ], + "metadata": { + "id": "f88mo_5Bez6C" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "52a87df3-04c2-4515-994a-9a3d0b85d3c3", + "_uuid": "9700cff3c818070f8f202bc964e2a5ffc8c72aa5", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "HX-ZHZpkjSN4", + "outputId": "cd4f8c6a-06e4-4420-ea9f-5f8fbaa0d116" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agejobmaritaleducationdefaultbalancehousingloancontactdaymonthdurationcampaignpdayspreviouspoutcomedeposit
059admin.marriedsecondaryno2343yesnounknown5may10421-10unknownyes
156admin.marriedsecondaryno45nonounknown5may14671-10unknownyes
241technicianmarriedsecondaryno1270yesnounknown5may13891-10unknownyes
355servicesmarriedsecondaryno2476yesnounknown5may5791-10unknownyes
454admin.marriedtertiaryno184nonounknown5may6732-10unknownyes
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age job marital education ... pdays previous poutcome deposit\n", + "0 59 admin. married secondary ... -1 0 unknown yes\n", + "1 56 admin. married secondary ... -1 0 unknown yes\n", + "2 41 technician married secondary ... -1 0 unknown yes\n", + "3 55 services married secondary ... -1 0 unknown yes\n", + "4 54 admin. married tertiary ... -1 0 unknown yes\n", + "\n", + "[5 rows x 17 columns]" + ] + }, + "metadata": {}, + "execution_count": 115 + } + ], + "source": [ + "# Load data file\n", + "bank=pd.read_csv('https://raw.githubusercontent.com/Ramanand-Yadav/DataSet/main/bank.csv')\n", + "bank.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "a727fd41-6e6e-424f-b65f-18c723b289a9", + "_uuid": "de29791be07bcac0927cccfb6e858044d1b882ca", + "id": "Pzth4W1YjSN5" + }, + "source": [ + "## Summay of data\n", + "\n", + "### Categorical Variables :\n", + "**[1] job :** admin,technician, services, management, retired, blue-collar, unemployed, entrepreneur,\n", + " housemaid, unknown, self-employed, student\n", + "
**[2] marital :** married, single, divorced\n", + "
**[3] education:** secondary, tertiary, primary, unknown\n", + "
**[4] default :** yes, no\n", + "
**[5] housing :** yes, no\n", + "
**[6] loan :** yes, no \n", + "
**[7] deposit :** yes, no ** (Dependent Variable)**\n", + "
**[8] contact :** unknown, cellular, telephone\n", + "
**[9] month :** jan, feb, mar, apr, may, jun, jul, aug, sep, oct, nov, dec\n", + "
**[10] poutcome:** unknown, other, failure, success\n", + "\n", + "### Numerical Variables:\n", + "**[1] age \n", + "
[2] balance\n", + "
[3] day\n", + "
[4] duration\n", + "
[5] campaign\n", + "
[6] pdays\n", + "
[7] previous**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "142f9bce-cf27-45c6-a776-0ad6e8c660c9", + "_uuid": "f928d98a7f2c9fda54ee20d38b3d03101339e451", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iyvtY2nEjSN6", + "outputId": "81c366db-d02c-4cb6-d1f1-b57947125bf9" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "age 0\n", + "job 0\n", + "marital 0\n", + "education 0\n", + "default 0\n", + "balance 0\n", + "housing 0\n", + "loan 0\n", + "contact 0\n", + "day 0\n", + "month 0\n", + "duration 0\n", + "campaign 0\n", + "pdays 0\n", + "previous 0\n", + "poutcome 0\n", + "deposit 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 116 + } + ], + "source": [ + "# Check if the data set contains any null values - Nothing found!\n", + "bank[bank.isnull().any(axis=1)].count()" + ] + }, + { + "cell_type": "code", + "source": [ + "bank.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FjwUYyOTvzLz", + "outputId": "3f3b6bfe-5a53-4e96-d395-fb742f3759e5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(11162, 17)" + ] + }, + "metadata": {}, + "execution_count": 117 + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "a484827c-4ca4-42ce-895a-8228f4b86c31", + "_uuid": "8d03c75ae5115ebc97a26ac95408240177c7fc3f", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "65JuHMiUjSN6", + "outputId": "612d2540-25d2-479b-96be-5922f5e979b5" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agebalancedaydurationcampaignpdaysprevious
count11162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.000000
mean41.2319481528.53852415.658036371.9938182.50842151.3304070.832557
std11.9133693225.4133268.420740347.1283862.722077108.7582822.292007
min18.000000-6847.0000001.0000002.0000001.000000-1.0000000.000000
25%32.000000122.0000008.000000138.0000001.000000-1.0000000.000000
50%39.000000550.00000015.000000255.0000002.000000-1.0000000.000000
75%49.0000001708.00000022.000000496.0000003.00000020.7500001.000000
max95.00000081204.00000031.0000003881.00000063.000000854.00000058.000000
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age balance ... pdays previous\n", + "count 11162.000000 11162.000000 ... 11162.000000 11162.000000\n", + "mean 41.231948 1528.538524 ... 51.330407 0.832557\n", + "std 11.913369 3225.413326 ... 108.758282 2.292007\n", + "min 18.000000 -6847.000000 ... -1.000000 0.000000\n", + "25% 32.000000 122.000000 ... -1.000000 0.000000\n", + "50% 39.000000 550.000000 ... -1.000000 0.000000\n", + "75% 49.000000 1708.000000 ... 20.750000 1.000000\n", + "max 95.000000 81204.000000 ... 854.000000 58.000000\n", + "\n", + "[8 rows x 7 columns]" + ] + }, + "metadata": {}, + "execution_count": 118 + } + ], + "source": [ + "bank.describe()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "###Numerical Data" + ], + "metadata": { + "id": "tMHl09z3qdPL" + } + }, + { + "cell_type": "code", + "source": [ + "numCol = ['age','balance','day','duration','campaign','pdays','previous']\n", + "fig, axe = plt.subplots(7, 2, figsize=(18, 18))\n", + "for index, col in enumerate(numCol):\n", + " # print(index, type(col))\n", + " sns.boxplot(ax=axe[index, 0], x=bank[col])\n", + " sns.distplot(ax=axe[index, 1], x=bank[col])\n", + " axe[index, 0].set_title(col)\n", + " axe[index, 1].set_title(col)\n", + " " + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "ZtUPYSCq6ztk", + "outputId": "47f0806d-2f87-4389-825e-c123640d36be" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2619: FutureWarning:\n", + "\n", + "`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2619: FutureWarning:\n", + "\n", + "`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2619: FutureWarning:\n", + "\n", + "`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2619: FutureWarning:\n", + "\n", + "`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2619: FutureWarning:\n", + "\n", + "`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2619: FutureWarning:\n", + "\n", + "`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2619: FutureWarning:\n", + "\n", + "`distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "f8e3b055-7d8e-4723-9e57-4a1e454ef194", + "_uuid": "d8cf9f54203152a76a7e8a03c3003644eb59525f", + "id": "wzMnrcIxjSN8" + }, + "source": [ + "### categorical data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "353340bc-f258-4ca4-96ae-ce3dd6da9e4f", + "_uuid": "fa62612c19f6f510de6155f1f279d6aa812b9d22", + "collapsed": true, + "id": "zkei7Ke1jSN9" + }, + "outputs": [], + "source": [ + "# Make a copy for parsing\n", + "bank_data = bank.copy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "2aaa6278-7388-461b-b0a6-7d0965872ca2", + "_uuid": "753b4f0d672266ac598f1afe35952245951ac8b1", + "id": "dHfxJN1YjSN9" + }, + "source": [ + "#### job " + ] + }, + { + "cell_type": "code", + "source": [ + "px.histogram(bank_data['job'])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 + }, + "id": "jS9J9AbCFziZ", + "outputId": "55c83307-5d7a-4fe9-f376-1c4ce8f1793a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "29ebb749-1a14-4ea7-85bb-e148945ed328", + "_uuid": "757d1a8bfab2c64c59bcd78a12eb43721e198758", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u_KCWvYGjSN9", + "outputId": "21d37870-e34c-4d81-ee61-3e073660f07d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "management : 1301\n", + "blue-collar : 708\n", + "technician : 840\n", + "admin. : 631\n", + "services : 369\n", + "retired : 516\n", + "self-employed : 187\n", + "student : 269\n", + "unemployed : 202\n", + "entrepreneur : 123\n", + "housemaid : 109\n", + "unknown : 34\n" + ] + } + ], + "source": [ + "# Explore People who made a deposit Vs Job category\n", + "jobs = ['management','blue-collar','technician','admin.','services','retired','self-employed','student',\\\n", + " 'unemployed','entrepreneur','housemaid','unknown']\n", + "\n", + "for j in jobs:\n", + " print(\"{:} : {:}\". format(j, len(bank_data[(bank_data.deposit == \"yes\") & (bank_data.job ==j)])))" + ] + }, + { + "cell_type": "code", + "source": [ + "px.histogram(bank_data, x='job', color = 'deposit')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 + }, + "id": "HaUUtoLdHAaR", + "outputId": "608a7bda-164e-421d-904c-c3eea34f4b18" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "cf54d19e-ab5b-4b16-8c17-a1d441675e4e", + "_uuid": "6c5d8eb98fc23a74d8e235d1816758e760ae7222", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WyOwz-VvjSN-", + "outputId": "ea9207fb-0b49-4612-b6f3-89c265a84822" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "management 2566\n", + "blue-collar 1944\n", + "technician 1823\n", + "admin. 1334\n", + "services 923\n", + "retired 778\n", + "self-employed 405\n", + "student 360\n", + "unemployed 357\n", + "entrepreneur 328\n", + "housemaid 274\n", + "unknown 70\n", + "Name: job, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 141 + } + ], + "source": [ + "# Different types of job categories and their counts\n", + "bank_data.job.value_counts()" + ] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "htUuo6d9D3ta" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "fdcd5741-12b8-4b48-99b6-bd6e834f0da9", + "_uuid": "58840f83e0b4fb1ea28c8aaebc109359f21e86e1", + "collapsed": true, + "id": "kSNoXW42jSN-" + }, + "outputs": [], + "source": [ + "# Combine similar jobs into categiroes\n", + "bank_data['job'] = bank_data['job'].replace(['management', 'admin.'], 'white-collar')\n", + "bank_data['job'] = bank_data['job'].replace(['services','housemaid'], 'pink-collar')\n", + "bank_data['job'] = bank_data['job'].replace(['retired', 'student', 'unemployed', 'unknown'], 'other')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "6c44a280-708c-472e-8a9d-454130ebd29f", + "_uuid": "60184753637b373ef973a5c38cd15bf51a1e58ac", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kFNhQiQ5jSN_", + "outputId": "638aae7f-0c6a-4777-fa48-53cdd5fb1904" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "white-collar 3900\n", + "blue-collar 1944\n", + "technician 1823\n", + "other 1565\n", + "pink-collar 1197\n", + "self-employed 405\n", + "entrepreneur 328\n", + "Name: job, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 143 + } + ], + "source": [ + "# New value counts\n", + "bank_data.job.value_counts()" + ] + }, + { + "cell_type": "code", + "source": [ + "px.histogram(bank_data, x='job')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 + }, + "id": "F30bzEG8HZ3F", + "outputId": "fced5260-7a5e-40f9-9341-baf4ab27b9e3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "px.histogram(bank_data, x='job', color = 'deposit')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 + }, + "id": "nmv81FWzHm3U", + "outputId": "e31676f0-fc0b-4ab4-969a-dd9d09377618" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "c446c4b5-d0ce-48e1-8ceb-6431e49ad97e", + "_uuid": "36fcf9b5b0dc9374f7540f918be453ece6eec86e", + "id": "7tdtMFRCjSN_" + }, + "source": [ + "#### poutcome" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "6516214b-7dab-4c4b-9abe-dd53db689a79", + "_uuid": "791b50d069b7a06e57095974f5c9c0e0d0c9b55d", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6Jg8ioI6jSN_", + "outputId": "7eb6e3d1-d1a2-40c1-b82e-3b11e943de93" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "unknown 8326\n", + "failure 1228\n", + "success 1071\n", + "other 537\n", + "Name: poutcome, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 146 + } + ], + "source": [ + "bank_data.poutcome.value_counts()" + ] + }, + { + "cell_type": "code", + "source": [ + "sns.countplot(bank_data['poutcome'])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 386 + }, + "id": "A9AjqcIyH4hJ", + "outputId": "0113d73a-9d4f-41b4-c5b0-f341312450b3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:\n", + "\n", + "Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n", + "\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 147 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "b827ea76-4adb-4472-9def-ebd268d5586f", + "_uuid": "dd9bc0417c255963d08c7eb721dde66b32d406dc", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "T4Sbh6U4jSOA", + "outputId": "841a9bdc-aca7-4be0-a226-bb3184beee75" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "unknown 8863\n", + "failure 1228\n", + "success 1071\n", + "Name: poutcome, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 148 + } + ], + "source": [ + "# Combine 'unknown' and 'other' as 'other' isn't really match with either 'success' or 'failure'\n", + "bank_data['poutcome'] = bank_data['poutcome'].replace(['other'] , 'unknown')\n", + "bank_data.poutcome.value_counts()" + ] + }, + { + "cell_type": "code", + "source": [ + "sns.countplot(bank_data['poutcome'])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 386 + }, + "id": "0q5EJsgVKW-S", + "outputId": "cc5d9853-ad11-4a45-f540-0ef60d4032ae" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:\n", + "\n", + "Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n", + "\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 149 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEGCAYAAACUzrmNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAATvElEQVR4nO3de7xlZX3f8c9XRiSSCANMqQ7YIZFoUWPECaJEq0JwhNTBS5RcdEQaNCVEk9p46asl5dLWGEPBRC0KAsorSIkGYmwJBVGKCgz3y4hMAYUp4MQZUbFiBn79Yz9HtsM58+wZzz6XOZ/363Vee61nPWvt3zl7znzPetbaz05VIUnSljxhtguQJM19hoUkqcuwkCR1GRaSpC7DQpLUtWi2CxiHPfbYo5YtWzbbZUjSvHLttdf+Q1UtmWzbdhkWy5YtY/Xq1bNdhiTNK0m+MdU2h6EkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEld2+U7uLfGC/7tObNdwoJw7QfePNslSPopeGYhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVLXWMMiyR8muTXJLUn+KslOSfZJclWStUk+nWTH1vdJbX1t275s6Djvbe23J3nlOGuWJD3e2MIiyVLgD4DlVfUcYAfgSOD9wClV9QxgI3B02+VoYGNrP6X1I8l+bb9nAyuADyfZYVx1S5Ieb9zDUIuAn0myCHgycB/wCuCCtv1s4Ii2vLKt07YfnCSt/byqeriq7gLWAgeMuW5J0pCxhUVVrQP+DPgmg5B4ELgW+E5VbWrd7gWWtuWlwD1t302t/+7D7ZPs82NJjkmyOsnq9evXT/83JEkL2DiHoRYzOCvYB3gasDODYaSxqKrTq2p5VS1fsmTJuJ5GkhakcQ5DHQLcVVXrq+ofgc8ABwG7tmEpgL2AdW15HbA3QNu+C/Dt4fZJ9pEkzYBxhsU3gQOTPLldezgYuA34AvD61mcVcGFbvqit07ZfVlXV2o9sd0vtA+wLXD3GuiVJm1nU77JtquqqJBcA1wGbgOuB04G/A85LclJrO6PtcgbwySRrgQ0M7oCiqm5Ncj6DoNkEHFtVj4yrbknS440tLACq6njg+M2a72SSu5mq6ofAb0xxnJOBk6e9QEnSSHwHtySpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkrrGGhZJdk1yQZKvJVmT5EVJdktySZI72uPi1jdJTkuyNslNSfYfOs6q1v+OJKvGWbMk6fHGfWZxKvA/q+pZwPOANcB7gEural/g0rYO8Cpg3/Z1DPARgCS7AccDLwQOAI6fCBhJ0swYW1gk2QV4KXAGQFX9qKq+A6wEzm7dzgaOaMsrgXNq4KvArkmeCrwSuKSqNlTVRuASYMW46pYkPd44zyz2AdYDn0hyfZKPJ9kZ2LOq7mt97gf2bMtLgXuG9r+3tU3VLkmaIeMMi0XA/sBHqur5wEM8NuQEQFUVUNPxZEmOSbI6yer169dPxyElSc04w+Je4N6quqqtX8AgPB5ow0u0x2+17euAvYf236u1TdX+E6rq9KpaXlXLlyxZMq3fiCQtdGMLi6q6H7gnyTNb08HAbcBFwMQdTauAC9vyRcCb211RBwIPtuGqi4FDkyxuF7YPbW2SpBmyaMzHPw44N8mOwJ3AUQwC6vwkRwPfAN7Q+n4eOAxYC/yg9aWqNiQ5Ebim9TuhqjaMuW5J0pCxhkVV3QAsn2TTwZP0LeDYKY5zJnDm9FYnSRqV7+CWJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXSOFRZJLR2mTJG2ftvjhR0l2Ap4M7NE+0jRt01OApWOuTZI0R/Q+Ke9twDuBpwHX8lhYfBf4izHWJUmaQ7YYFlV1KnBqkuOq6kMzVJMkaY4Z6TO4q+pDSV4MLBvep6rOGVNdkqQ5ZKSwSPJJ4BeAG4BHWnMBhoUkLQAjhQWwHNivqmqcxUiS5qZR32dxC/BPx1mIJGnuGvXMYg/gtiRXAw9PNFbVq8dSlSRpThk1LP5knEVIkua2Ue+G+uK4C5EkzV2j3g31PQZ3PwHsCDwReKiqnjKuwiRJc8eoZxY/N7GcJMBK4MBxFSVJmlu2etbZGvgb4JVjqEeSNAeNOgz12qHVJzB438UPx1KRJGnOGfVuqH85tLwJuJvBUJQkaQEY9ZrFUeMuRJI0d4364Ud7Jflskm+1r79Oste4i5MkzQ2jXuD+BHARg8+1eBrwt61NkrQAjBoWS6rqE1W1qX2dBSwZY12SpDlk1LD4dpLfSbJD+/od4NvjLEySNHeMGhZvBd4A3A/cB7weeMuYapIkzTGj3jp7ArCqqjYCJNkN+DMGISJJ2s6NembxSxNBAVBVG4Dnj7JjG7a6Psnn2vo+Sa5KsjbJp5Ps2Nqf1NbXtu3Lho7x3tZ+exLfOS5JM2zUsHhCksUTK+3MYtSzkncAa4bW3w+cUlXPADYCR7f2o4GNrf2U1o8k+wFHAs8GVgAfTrLDiM8tSZoGo4bFB4GvJDkxyYnAl4E/7e3U3otxOPDxth7gFcAFrcvZwBFteWVbp20/eGjSwvOq6uGqugtYCxwwYt2SpGkwUlhU1TnAa4EH2tdrq+qTI+z6X4E/Bh5t67sD36mqTW39XmBpW14K3NOebxPwYOv/4/ZJ9vmxJMckWZ1k9fr160f5tiRJIxp1KImqug24bdT+SX4d+FZVXZvkZdtQ21apqtOB0wGWL19ene6SpK0wclhsg4OAVyc5DNgJeApwKrBrkkXt7GEvYF3rvw7YG7g3ySJgFwbv5ZhonzC8jyRpBmz151mMqqreW1V7VdUyBheoL6uq3wa+wOB9GgCrgAvb8kVtnbb9sqqq1n5ku1tqH2Bf4Opx1S1JerxxnllM5d3AeUlOAq4HzmjtZwCfTLIW2MAgYKiqW5Ocz2AIbBNwbFU9MvNlS9LCNSNhUVWXA5e35TuZ5G6mqvoh8BtT7H8ycPL4KpQkbcnYhqEkSdsPw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldYwuLJHsn+UKS25LcmuQdrX23JJckuaM9Lm7tSXJakrVJbkqy/9CxVrX+dyRZNa6aJUmTG+eZxSbg31TVfsCBwLFJ9gPeA1xaVfsCl7Z1gFcB+7avY4CPwCBcgOOBFwIHAMdPBIwkaWaMLSyq6r6quq4tfw9YAywFVgJnt25nA0e05ZXAOTXwVWDXJE8FXglcUlUbqmojcAmwYlx1S5Ieb0auWSRZBjwfuArYs6rua5vuB/Zsy0uBe4Z2u7e1TdW++XMck2R1ktXr16+f1volaaEbe1gk+Vngr4F3VtV3h7dVVQE1Hc9TVadX1fKqWr5kyZLpOKQkqRlrWCR5IoOgOLeqPtOaH2jDS7THb7X2dcDeQ7vv1dqmapckzZBx3g0V4AxgTVX9+dCmi4CJO5pWARcOtb+53RV1IPBgG666GDg0yeJ2YfvQ1iZJmiGLxnjsg4A3ATcnuaG1vQ/4L8D5SY4GvgG8oW37PHAYsBb4AXAUQFVtSHIicE3rd0JVbRhj3ZKkzYwtLKrqfwOZYvPBk/Qv4NgpjnUmcOb0VSdJ2hq+g1uS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6hrnrLPS2H3zhOfOdgnbvaf/h5tnuwTNAZ5ZSJK6DAtJUpdhIUnq8pqFpFlz0IcOmu0StntXHnfltBzHMwtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdc2bsEiyIsntSdYmec9s1yNJC8m8CIskOwB/CbwK2A/4zST7zW5VkrRwzIuwAA4A1lbVnVX1I+A8YOUs1yRJC0aqarZr6EryemBFVf2rtv4m4IVV9ftDfY4BjmmrzwRun/FCZ84ewD/MdhHaZr5+89f2/tr9s6paMtmGRTNdybhU1enA6bNdx0xIsrqqls92Hdo2vn7z10J+7ebLMNQ6YO+h9b1amyRpBsyXsLgG2DfJPkl2BI4ELprlmiRpwZgXw1BVtSnJ7wMXAzsAZ1bVrbNc1mxaEMNt2zFfv/lrwb528+ICtyRpds2XYShJ0iwyLCRJXYbFHJLkLUn+YrbrUF+SP0iyJsm5U2xfnuS0tuzrqnlvXlzgluagfw0cUlX3TraxqlYDq7flwEkWVdWmn6Y4abp5ZjFGSZYluWVo/V1J/iTJ5Unen+TqJF9P8pJJ9j08yVeS7JHkrCSnJflykjvbO9rJwAeS3JLk5iRvbO1/meTVbfmzSc5sy29NcnKra02SjyW5NcnfJ/mZmfmpzH9JPgr8PPA/kry7vU7Xt9fnma3Py5J8bpJ9z5p4/dr694f6X5HkIuC2JDu01/aaJDcledsMfXvbnSQ7J/m7JDe235U3Jrk7yR5t+/Ikl7fln03yifb7dFOS17X2FUmua8e4dOi4Z7bf4+uTrGztz25tN7Rj7DtZDbP049hmnlnMnkVVdUCSw4DjgUMmNiR5DfBHwGFVtTEJwFOBXwWexeA9JhcArwV+GXgeg2kIrknyJeAK4CWt39K2L63tvLa8L/CbVfW7Sc4HXgd8anzf7vajqt6eZAXwcuBHwAfb7d2HAP+Jwc9yW+wPPKeq7mrT1zxYVb+S5EnAlUn+vqrumpZvYmFZAfzfqjocIMkuwPun6PvvGfzcn9v6Lk6yBPgY8NL22uzW+v474LKqemuSXYGrk/wv4O3AqVV1bntf2A7AYZPUMK94ZjF7PtMerwWWDbW/Ang3cHhVbRxq/5uqerSqbgP2bG2/CvxVVT1SVQ8AXwR+hRYWbWbe24AHkjwVeBHw5bbvXVV1wxQ1aHS7AP+9nUGeAjz7pzjW1UNhcCjw5iQ3AFcBuzMIeG29m4Ffa2fzL6mqB7fQ9xAGM1wD0H4HDwS+NPHaVNWGtvlQ4D3tNboc2Al4OvAV4H1J3s1grqX/t5U1zEmeWYzXJn4ykHcaWn64PT7CT74O/4fBEMcv8pNj3g8PLWdLT1pV69pfOiuALwG7AW8Avl9V30uy+2bHewRwGGrbnAh8oapek2QZg/80tuTH/yaSPAHYcWjbQ0PLAY6rqounrdIFqqq+nmR/Bn/dn9SGkYZ/N3eacuctC/C6qtp80tI1Sa4CDgc+n+RtVXXZ5jVU1Qnb+LyzwjOL8XoA+CdJdm9DCb8+wj7fYDCMcU6S3l+pVwBvbOPbS4CXAle3bV8F3skgLK4A3tUeNb124bF5yt4yQv+7gRe05VcDT5yi38XA7yV5IkCSX0yy87aXuXAleRrwg6r6FPABBsN9d/PY6zA8bHgJcOzQvosZ/C69NMk+rW1iGOpi4Li0ceIkz2+PPw/cWVWnARcCvzRFDfOKYTFGVfWPwAkM/gO/BPjaiPt9DfhtBsMbv7CFrp8FbgJuBC4D/riq7m/brmBwXWQtcB2DswvDYvr9KfCfk1zPaGfqHwP+RZIbGQwLPjRFv48zGEK8rg1x/bcRj6/Hey6D6wk3MLg+eBLwH4FTk6xmcGY94SRgcbsIfSPw8qpaz+DjDz7T2j7d+p7IIOxvSnJrW4fBWfwt7fmeA5wzRQ3zitN9SJK6PLOQJHUZFpKkLsNCktRlWEiSugwLSVKXYSGNSZvv6cWzXYc0HQwLaXxeBhgW2i4YFtJm2qy8X0tybgaz816Q5MlJDm6zi97cZht9Uuv/uBlM29Qfbwf+sM0++pIke2YwC/CN7evFbZ8/am8CuyXJOzer4awMZiY+N8khSa5MckeSA1q/SWc+laabYSFN7pnAh6vqnwPfZTAL8FnAG9uMpIuA35tq56q6G/gocEpV/XJVXQGcBnyxqp7HYLqHW5O8ADgKeCGDCet+d2LaCOAZwAcZzDT8LOC3GEwe+S7gfa3PxMynBzCYBfcDTguicTAspMndU1VXtuVPAQczmKn3663tbAZzcW2NVwAfAWgzBT/I4D//z1bVQ1X1fQazEU98vsldVXVzVT0K3ApcWoMpF27msVmCp5r5VJpWzjUjTW7zeXC+w2Ca8MlMxwymkxmeGfjRofVHeex3d6qZT6Vp5ZmFNLmnJ3lRW/4tBtPFL0vyjNb2JgafHwJTz2D6PeDnhtYvpQ1dtZmCd2EwueMR7ZrIzsBr2LoJHyed+VSaboaFNLnbgWOTrAEWM/hgo6MYzAR8M4O/7j/a+k41g+nfAq+ZuMANvAN4edv/WmC/qrqOwbWQqxl8yNHHq+r6rahzqplPpWnlrLPSZtqdTJ+rqufMcinSnOGZhSSpyzMLSVKXZxaSpC7DQpLUZVhIkroMC0lSl2EhSer6/zp2PM9UxRBFAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "643362de-18ca-45c2-95a9-6ca2c19f0144", + "_uuid": "154cad1a97e082dea7fefef0084301c391b78bf9", + "id": "QuXz1WcojSOA" + }, + "source": [ + "#### contact" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data.contact.value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qt5xaWmxKhBl", + "outputId": "7bdc5898-e9e4-4ca1-e879-04e5b011dffe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "cellular 8042\n", + "unknown 2346\n", + "telephone 774\n", + "Name: contact, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 154 + } + ] + }, + { + "cell_type": "code", + "source": [ + "sns.countplot(bank_data['contact'])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 386 + }, + "id": "Y8g5bX3rLSQD", + "outputId": "33765089-b007-43e6-bf37-d6edc147d994" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:\n", + "\n", + "Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n", + "\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 155 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "edf8d6c3-f66a-4521-bb82-13de0e175244", + "_uuid": "7b3cd6910378c11f5edacde330f5a312d0f58d16", + "collapsed": true, + "id": "Y3O7BGy5jSOB" + }, + "outputs": [], + "source": [ + "# Drop 'contact', as every participant has been contacted. \n", + "bank_data.drop('contact', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "dUzQlsN3oAu4", + "outputId": "981de64f-8d00-4264-a3fd-34089ad41177" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agejobmaritaleducationdefaultbalancehousingloandaymonthdurationcampaignpdayspreviouspoutcomedeposit
059white-collarmarriedsecondaryno2343yesno5may10421-10unknownyes
156white-collarmarriedsecondaryno45nono5may14671-10unknownyes
241technicianmarriedsecondaryno1270yesno5may13891-10unknownyes
355pink-collarmarriedsecondaryno2476yesno5may5791-10unknownyes
454white-collarmarriedtertiaryno184nono5may6732-10unknownyes
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age job marital education ... pdays previous poutcome deposit\n", + "0 59 white-collar married secondary ... -1 0 unknown yes\n", + "1 56 white-collar married secondary ... -1 0 unknown yes\n", + "2 41 technician married secondary ... -1 0 unknown yes\n", + "3 55 pink-collar married secondary ... -1 0 unknown yes\n", + "4 54 white-collar married tertiary ... -1 0 unknown yes\n", + "\n", + "[5 rows x 16 columns]" + ] + }, + "metadata": {}, + "execution_count": 157 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "167de291-a2df-4371-837b-25cc84121fc9", + "_uuid": "8d519172290ca18fd70843a45677e0524b460a8c", + "id": "Ti0iUujFjSOB" + }, + "source": [ + "#### default" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data['default'].value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w4sEHNguSbDc", + "outputId": "e8a3b862-8066-4d22-c681-6a20660e509c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "no 10994\n", + "yes 168\n", + "Name: default, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 169 + } + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data['default'].value_counts().plot(kind='pie', autopct='%.2f')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 265 + }, + "id": "1aI-eM2uLmnk", + "outputId": "50b1be20-6ec9-4bbf-c880-166304487967" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 168 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "9445241c-8d2f-4552-a879-f35bd08155ee", + "_uuid": "35a4bc625d393ae0b38076c7bdb88e12be134acf", + "collapsed": true, + "id": "MPbcLqBEjSOB" + }, + "outputs": [], + "source": [ + "# values for \"default\" : yes/no\n", + "bank_data[\"default\"]\n", + "bank_data['default_cat'] = bank_data['default'].map( {'yes':1, 'no':0} )\n", + "bank_data.drop('default', axis=1,inplace = True)\n" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "_rzwJZgDobOd", + "outputId": "3b1359f9-de6b-4142-a9d7-b1470e96a6e3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agejobmaritaleducationbalancehousingloandaymonthdurationcampaignpdayspreviouspoutcomedepositdefault_cat
059white-collarmarriedsecondary2343yesno5may10421-10unknownyes0
156white-collarmarriedsecondary45nono5may14671-10unknownyes0
241technicianmarriedsecondary1270yesno5may13891-10unknownyes0
355pink-collarmarriedsecondary2476yesno5may5791-10unknownyes0
454white-collarmarriedtertiary184nono5may6732-10unknownyes0
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age job marital ... poutcome deposit default_cat\n", + "0 59 white-collar married ... unknown yes 0\n", + "1 56 white-collar married ... unknown yes 0\n", + "2 41 technician married ... unknown yes 0\n", + "3 55 pink-collar married ... unknown yes 0\n", + "4 54 white-collar married ... unknown yes 0\n", + "\n", + "[5 rows x 16 columns]" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "e69fa9cf-4b44-450e-9c11-52edad2336c0", + "_uuid": "71c97178c06dfb20561a6f4813eaf48f16270ed2", + "id": "Lx4MVHu2jSOC" + }, + "source": [ + "#### housing" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data['housing'].value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uCckpA0KSoQZ", + "outputId": "5239d7ae-b55f-4e91-de95-2eb178f33555" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "no 5881\n", + "yes 5281\n", + "Name: housing, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 171 + } + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data['housing'].value_counts().plot(kind='pie', autopct='%.2f')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 265 + }, + "id": "z_e6OrCSSmG1", + "outputId": "2f9696dd-1d7c-4460-8334-316ca4621ffe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 173 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "9f33486b-6c4c-4a12-9174-54be5f788091", + "_uuid": "5625dd9f23af740ce738cb5f1d45ca18dc4d3fe6", + "collapsed": true, + "id": "LNQ4UaG6jSOC" + }, + "outputs": [], + "source": [ + "# values for \"housing\" : yes/no\n", + "bank_data[\"housing_cat\"]=bank_data['housing'].map({'yes':1, 'no':0})\n", + "bank_data.drop('housing', axis=1,inplace = True)" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "PcSsNAayojM1", + "outputId": "00cd28fb-faf4-4031-d471-90edeb7d4f2d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agejobmaritaleducationbalanceloandaymonthdurationcampaignpdayspreviouspoutcomedepositdefault_cathousing_cat
059white-collarmarriedsecondary2343no5may10421-10unknownyes01
156white-collarmarriedsecondary45no5may14671-10unknownyes00
241technicianmarriedsecondary1270no5may13891-10unknownyes01
355pink-collarmarriedsecondary2476no5may5791-10unknownyes01
454white-collarmarriedtertiary184no5may6732-10unknownyes00
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age job marital ... deposit default_cat housing_cat\n", + "0 59 white-collar married ... yes 0 1\n", + "1 56 white-collar married ... yes 0 0\n", + "2 41 technician married ... yes 0 1\n", + "3 55 pink-collar married ... yes 0 1\n", + "4 54 white-collar married ... yes 0 0\n", + "\n", + "[5 rows x 16 columns]" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "f4be5aa2-9702-4b64-a732-b473ac696631", + "_uuid": "dfae48f26206aa2f27803205e071841fa877307c", + "id": "0gZW1UaujSOC" + }, + "source": [ + "#### loan" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data['loan'].value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dtusL89GS1ZS", + "outputId": "b7ec4e88-03e4-4e18-bde2-d70e90b9fab1" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "no 9702\n", + "yes 1460\n", + "Name: loan, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 175 + } + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data['loan'].value_counts().plot(kind='pie', autopct='%.2f')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 265 + }, + "id": "mtcmU4ySS4dS", + "outputId": "7aefbfe1-1238-4f6e-96f9-27dc5f411dcd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 176 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "c1b2dc72-ee07-4995-aa5e-e5bbd0a541b8", + "_uuid": "557f3471b7552919b437e703005830585a715e56", + "collapsed": true, + "id": "RveCNESJjSOC" + }, + "outputs": [], + "source": [ + "# values for \"loan\" : yes/no\n", + "bank_data[\"loan_cat\"] = bank_data['loan'].map({'yes':1, 'no':0})\n", + "bank_data.drop('loan', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 250 + }, + "id": "hb4a6wRBothv", + "outputId": "b35f432d-c24e-4518-8c43-60cfd435dcd3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agejobmaritaleducationbalancedaymonthdurationcampaignpdayspreviouspoutcomedepositdefault_cathousing_catloan_cat
059white-collarmarriedsecondary23435may10421-10unknownyes010
156white-collarmarriedsecondary455may14671-10unknownyes000
241technicianmarriedsecondary12705may13891-10unknownyes010
355pink-collarmarriedsecondary24765may5791-10unknownyes010
454white-collarmarriedtertiary1845may6732-10unknownyes000
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age job marital ... default_cat housing_cat loan_cat\n", + "0 59 white-collar married ... 0 1 0\n", + "1 56 white-collar married ... 0 0 0\n", + "2 41 technician married ... 0 1 0\n", + "3 55 pink-collar married ... 0 1 0\n", + "4 54 white-collar married ... 0 0 0\n", + "\n", + "[5 rows x 16 columns]" + ] + }, + "metadata": {}, + "execution_count": 178 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "422662b8-94f4-4b1f-a61c-71dca5627c7c", + "_uuid": "f94605bb3c4f22900794ad6b1d066d3808d9d5b1", + "id": "huql5gAajSOC" + }, + "source": [ + "#### month, day " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "a004d399-5d98-4767-a629-4008e834a501", + "_uuid": "24b3d3f4534ea7b714743123dc7f1186d6c6165a", + "collapsed": true, + "id": "ny9n4xjcjSOD" + }, + "outputs": [], + "source": [ + "# day : last contact day of the month\n", + "# month: last contact month of year\n", + "# Drop 'month' and 'day' \n", + "bank_data.drop('month', axis=1, inplace=True)\n", + "bank_data.drop('day', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "3w49soWIo3Uy", + "outputId": "bbf35cd6-69ee-49bf-dcb1-14c23fbe2def" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agejobmaritaleducationbalancedurationcampaignpdayspreviouspoutcomedepositdefault_cathousing_catloan_cat
059white-collarmarriedsecondary234310421-10unknownyes010
156white-collarmarriedsecondary4514671-10unknownyes000
241technicianmarriedsecondary127013891-10unknownyes010
355pink-collarmarriedsecondary24765791-10unknownyes010
454white-collarmarriedtertiary1846732-10unknownyes000
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age job marital ... default_cat housing_cat loan_cat\n", + "0 59 white-collar married ... 0 1 0\n", + "1 56 white-collar married ... 0 0 0\n", + "2 41 technician married ... 0 1 0\n", + "3 55 pink-collar married ... 0 1 0\n", + "4 54 white-collar married ... 0 0 0\n", + "\n", + "[5 rows x 14 columns]" + ] + }, + "metadata": {}, + "execution_count": 180 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "3114fee4-f7f6-4bb3-9e9e-30080deacae8", + "_uuid": "082214ade8f57c636812167b6b6f11a291209234", + "id": "j3rFMDx0jSOD" + }, + "source": [ + "#### deposit " + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data['deposit'].value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9uOE6mPPTJNH", + "outputId": "d042f3d9-211a-4483-a35f-eb7c382e78e9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "no 5873\n", + "yes 5289\n", + "Name: deposit, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 181 + } + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data['deposit'].value_counts().plot(kind='pie', autopct='%.2f')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 265 + }, + "id": "nF-vlFx8TNXo", + "outputId": "83547163-8d99-4e2c-e5a6-384504438eb4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 182 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "824acb7c-9ebb-4ec3-abc2-91b3a8dfbba8", + "_uuid": "8983691ec985826964e224d851c47fa53d5189a3", + "collapsed": true, + "id": "KCXAige3jSOD" + }, + "outputs": [], + "source": [ + "# values for \"deposit\" : yes/no\n", + "bank_data[\"deposit_cat\"] = bank_data['deposit'].map({'yes':1, 'no':0})\n", + "bank_data.drop('deposit', axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "Nxr19rhupBhD", + "outputId": "a1d1a23d-4586-4d0a-d561-b6f1adfe75ff" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agejobmaritaleducationbalancedurationcampaignpdayspreviouspoutcomedefault_cathousing_catloan_catdeposit_cat
059white-collarmarriedsecondary234310421-10unknown0101
156white-collarmarriedsecondary4514671-10unknown0001
241technicianmarriedsecondary127013891-10unknown0101
355pink-collarmarriedsecondary24765791-10unknown0101
454white-collarmarriedtertiary1846732-10unknown0001
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age job marital ... housing_cat loan_cat deposit_cat\n", + "0 59 white-collar married ... 1 0 1\n", + "1 56 white-collar married ... 0 0 1\n", + "2 41 technician married ... 1 0 1\n", + "3 55 pink-collar married ... 1 0 1\n", + "4 54 white-collar married ... 0 0 1\n", + "\n", + "[5 rows x 14 columns]" + ] + }, + "metadata": {}, + "execution_count": 184 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "6a07560d-d8d6-44a7-bc40-e3fefe56c07a", + "_uuid": "ae91146f0a733b46f2ffc929e6c15c637b5a5464", + "id": "o8-RUiIhjSOD" + }, + "source": [ + "#### pdays" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "a0a43966-0a0a-4c87-9fbb-98753440475e", + "_uuid": "a9285460820d03a8a46b00efa4ab5c0b7bfdfdba", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_MnWo1CejSOD", + "outputId": "738d05d2-484f-4bff-d4ff-dea5a5508222" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Customers that have not been contacted before: 8324\n", + "Maximum values on padys : 854\n" + ] + } + ], + "source": [ + "# pdays: number of days that passed by after the client was last contacted from a previous campaign\n", + "# -1 means client was not previously contacted\n", + "\n", + "print(\"Customers that have not been contacted before:\", len(bank_data[bank_data.pdays==-1]))\n", + "print(\"Maximum values on padys :\", bank_data['pdays'].max())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "27c4a40d-a6e7-455f-85ef-db661c2f3bf6", + "_uuid": "4a5ce52f6514b2c6a98a05496f464958bb08e6d1", + "collapsed": true, + "id": "MQmZyxsHjSOE" + }, + "outputs": [], + "source": [ + "# Map pdays=-1 into a large value (10000 is used) to indicate that it is so far in the past that it has no effect\n", + "bank_data.loc[bank_data['pdays'] == -1, 'pdays'] = 10000" + ] + }, + { + "cell_type": "code", + "source": [ + "bank_data.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "KywPgIrcpYlU", + "outputId": "df9050cf-75e9-4ec1-a82f-2c27633a8165" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agejobmaritaleducationbalancedurationcampaignpdayspreviouspoutcomedefault_cathousing_catloan_catdeposit_cat
059white-collarmarriedsecondary234310421100000unknown0101
156white-collarmarriedsecondary4514671100000unknown0001
241technicianmarriedsecondary127013891100000unknown0101
355pink-collarmarriedsecondary24765791100000unknown0101
454white-collarmarriedtertiary1846732100000unknown0001
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age job marital ... housing_cat loan_cat deposit_cat\n", + "0 59 white-collar married ... 1 0 1\n", + "1 56 white-collar married ... 0 0 1\n", + "2 41 technician married ... 1 0 1\n", + "3 55 pink-collar married ... 1 0 1\n", + "4 54 white-collar married ... 0 0 1\n", + "\n", + "[5 rows x 14 columns]" + ] + }, + "metadata": {}, + "execution_count": 189 + } + ] + }, + { + "cell_type": "code", + "source": [ + "1/bank_data.pdays" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cQOgRyPTTz3a", + "outputId": "a35d7732-dfd3-421c-a3ad-843027a873c2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 0.000100\n", + "1 0.000100\n", + "2 0.000100\n", + "3 0.000100\n", + "4 0.000100\n", + " ... \n", + "11157 0.000100\n", + "11158 0.000100\n", + "11159 0.000100\n", + "11160 0.005814\n", + "11161 0.000100\n", + "Name: pdays, Length: 11162, dtype: float64" + ] + }, + "metadata": {}, + "execution_count": 195 + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "dc334f4c-7756-42d0-884e-c87c8c205b72", + "_uuid": "32aed7514950db77534edf31950599dd2de13174", + "collapsed": true, + "id": "E4zzaRhgjSOE" + }, + "outputs": [], + "source": [ + "# Create a new column: recent_pdays \n", + "bank_data['recent_pdays'] = np.where(bank_data['pdays'], 1/bank_data.pdays, 1/bank_data.pdays)\n", + "\n", + "# Drop 'pdays'\n", + "bank_data.drop('pdays', axis=1, inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "d081cff8-2b59-449e-aa9d-c47b95bf9f3b", + "_uuid": "0fd651329be4e215dfe12e7d0dcd825081fc22bb", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 250 + }, + "id": "-56l68DNjSOE", + "outputId": "bdef22da-4791-4b50-86f1-50cef251ccfb" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agejobmaritaleducationbalancedurationcampaignpreviouspoutcomedefault_cathousing_catloan_catdeposit_catrecent_pdays
1115733blue-collarsingleprimary125710unknown01000.000100
1115839pink-collarmarriedsecondary7338340unknown00000.000100
1115932techniciansinglesecondary2915620unknown00000.000100
1116043technicianmarriedsecondary0925failure00100.005814
1116134technicianmarriedsecondary062810unknown00000.000100
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age job marital ... loan_cat deposit_cat recent_pdays\n", + "11157 33 blue-collar single ... 0 0 0.000100\n", + "11158 39 pink-collar married ... 0 0 0.000100\n", + "11159 32 technician single ... 0 0 0.000100\n", + "11160 43 technician married ... 1 0 0.005814\n", + "11161 34 technician married ... 0 0 0.000100\n", + "\n", + "[5 rows x 14 columns]" + ] + }, + "metadata": {}, + "execution_count": 198 + } + ], + "source": [ + "bank_data.tail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "675e8d8f-2e23-4976-a7e9-fdb0d4983393", + "_uuid": "6c0309015c140816d4fc83ce6af0d47daa6ac0f0", + "id": "2xP7i2LBjSOF" + }, + "source": [ + "### Convert to dummy values " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "65a1ee10-aa81-4e44-b159-88f426ad0ae3", + "_uuid": "63ac123483934cbc0e3703950e9cd7860184e985", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 288 + }, + "id": "eam_AkO4jSOF", + "outputId": "4510be08-9135-4658-de14-bc3ce46ef180" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agebalancedurationcampaignpreviousdefault_cathousing_catloan_catdeposit_catrecent_pdaysjob_blue-collarjob_entrepreneurjob_otherjob_pink-collarjob_self-employedjob_technicianjob_white-collarmarital_divorcedmarital_marriedmarital_singleeducation_primaryeducation_secondaryeducation_tertiaryeducation_unknownpoutcome_failurepoutcome_successpoutcome_unknown
059234310421001010.000100000010100100001
1564514671000010.000100000010100100001
241127013891001010.000100000100100100001
35524765791001010.000100010000100100001
4541846732000010.000100000010100010001
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age balance duration ... poutcome_failure poutcome_success poutcome_unknown\n", + "0 59 2343 1042 ... 0 0 1\n", + "1 56 45 1467 ... 0 0 1\n", + "2 41 1270 1389 ... 0 0 1\n", + "3 55 2476 579 ... 0 0 1\n", + "4 54 184 673 ... 0 0 1\n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "metadata": {}, + "execution_count": 199 + } + ], + "source": [ + "# Convert categorical variables to dummies\n", + "bank_with_dummies = pd.get_dummies(data=bank_data, columns = ['job', 'marital', 'education', 'poutcome'], \\\n", + " prefix = ['job', 'marital', 'education', 'poutcome'])\n", + "bank_with_dummies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "dc46d20e-db1f-41cc-a0c4-bea3f800235e", + "_uuid": "5fae26f3d9d4c343d75d90163ebb54f64c4d798e", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "t6jdyBnxjSOF", + "outputId": "605b712c-c72a-4e2e-8a20-f93ea0f391c0" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(11162, 27)" + ] + }, + "metadata": {}, + "execution_count": 200 + } + ], + "source": [ + "bank_with_dummies.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "8daac786-1fcd-4dd5-b789-8dbb96011c44", + "_uuid": "b9018d84ec2eae5368860f237e47e00473deeb05", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 382 + }, + "id": "BcTsdw9_jSOF", + "outputId": "a45a5f6f-202a-45f4-c107-fc1a2e9ea9ad" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agebalancedurationcampaignpreviousdefault_cathousing_catloan_catdeposit_catrecent_pdaysjob_blue-collarjob_entrepreneurjob_otherjob_pink-collarjob_self-employedjob_technicianjob_white-collarmarital_divorcedmarital_marriedmarital_singleeducation_primaryeducation_secondaryeducation_tertiaryeducation_unknownpoutcome_failurepoutcome_successpoutcome_unknown
count11162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.00000011162.000000
mean41.2319481528.538524371.9938182.5084210.8325570.0150510.4731230.1308010.4738400.0031240.1741620.0293850.1402080.1072390.0362840.1633220.3494000.1158390.5689840.3151760.1343850.4905930.3304960.0445260.1100160.0959510.794033
std11.9133693225.413326347.1283862.7220772.2920070.1217610.4992990.3371980.4993380.0306860.3792660.1688920.3472180.3094310.1870040.3696760.4768020.3200470.4952410.4646070.3410800.4999340.4704130.2062700.3129240.2945370.404424
min18.000000-6847.0000002.0000001.0000000.0000000.0000000.0000000.0000000.0000000.0001000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%32.000000122.000000138.0000001.0000000.0000000.0000000.0000000.0000000.0000000.0001000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
50%39.000000550.000000255.0000002.0000000.0000000.0000000.0000000.0000000.0000000.0001000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
75%49.0000001708.000000496.0000003.0000001.0000000.0000001.0000000.0000001.0000000.0019190.0000000.0000000.0000000.0000000.0000000.0000001.0000000.0000001.0000001.0000000.0000001.0000001.0000000.0000000.0000000.0000001.000000
max95.00000081204.0000003881.00000063.00000058.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age balance ... poutcome_success poutcome_unknown\n", + "count 11162.000000 11162.000000 ... 11162.000000 11162.000000\n", + "mean 41.231948 1528.538524 ... 0.095951 0.794033\n", + "std 11.913369 3225.413326 ... 0.294537 0.404424\n", + "min 18.000000 -6847.000000 ... 0.000000 0.000000\n", + "25% 32.000000 122.000000 ... 0.000000 1.000000\n", + "50% 39.000000 550.000000 ... 0.000000 1.000000\n", + "75% 49.000000 1708.000000 ... 0.000000 1.000000\n", + "max 95.000000 81204.000000 ... 1.000000 1.000000\n", + "\n", + "[8 rows x 27 columns]" + ] + }, + "metadata": {}, + "execution_count": 201 + } + ], + "source": [ + "bank_with_dummies.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "e1cc517d-cc97-4eee-a258-8c554ed95332", + "_uuid": "beef0817aff3dbd8c920a4cd91bc6e56f00ecd5a", + "id": "Y_KjIwWtjSOG" + }, + "source": [ + "### Observations on whole population" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "e49e7895-a6ac-4b92-9536-f341621d4604", + "_uuid": "3ace943aed1952f64db8239a3e3a5ebdb39dfdc3", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 + }, + "id": "VhtBIQAZjSOG", + "outputId": "ad0d2c4c-a930-4f8c-84ca-5fd37d7a5ffd" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Scatterplot showing age and balance\n", + "bank_with_dummies.plot(kind='scatter', x='age', y='balance');\n", + "\n", + "# Across all ages, majority of people have savings of less than 20000." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "46f48fc7-340c-48bb-8f52-7f9a43d03430", + "_uuid": "f5bccfdf19b6efdaccf2663c114f85ab910fa4a4", + "id": "fwi0C0L9jSOG" + }, + "source": [ + "#### Analysis on people who sign up for a term deposite" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "52d4b887-1b35-434d-adf2-243edd1b3347", + "_uuid": "0be3fa7ed1542d993bb5c8bc36b36d6ed7ca8092", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 382 + }, + "id": "u_p522EUjSOG", + "outputId": "d692072d-97cf-4e24-a97b-9068446f2e91" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agebalancedurationcampaignpreviousdefault_cathousing_catloan_catdeposit_catrecent_pdaysjob_blue-collarjob_entrepreneurjob_otherjob_pink-collarjob_self-employedjob_technicianjob_white-collarmarital_divorcedmarital_marriedmarital_singleeducation_primaryeducation_secondaryeducation_tertiaryeducation_unknownpoutcome_failurepoutcome_successpoutcome_unknown
count5289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.05289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.0000005289.000000
mean41.6700701804.267915537.2945742.1410471.1703540.0098320.3658540.0915111.00.0042380.1338630.0232560.1930420.0903760.0353560.1588200.3652860.1176030.5208920.3615050.1117410.4632260.3773870.0476460.1168460.1849120.698242
std13.4977813501.104777392.5252621.9218262.5532720.0986760.4817140.2883610.00.0356860.3405370.1507290.3947230.2867470.1846960.3655430.4815560.3221670.4996110.4804820.3150780.4986930.4847790.2130360.3212670.3882630.459064
min18.000000-3058.0000008.0000001.0000000.0000000.0000000.0000000.0000001.00.0001000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%31.000000210.000000244.0000001.0000000.0000000.0000000.0000000.0000001.00.0001000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
50%38.000000733.000000426.0000002.0000000.0000000.0000000.0000000.0000001.00.0001000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
75%50.0000002159.000000725.0000003.0000001.0000000.0000001.0000000.0000001.00.0051280.0000000.0000000.0000000.0000000.0000000.0000001.0000000.0000001.0000001.0000000.0000001.0000001.0000000.0000000.0000000.0000001.000000
max95.00000081204.0000003881.00000032.00000058.0000001.0000001.0000001.0000001.01.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age balance ... poutcome_success poutcome_unknown\n", + "count 5289.000000 5289.000000 ... 5289.000000 5289.000000\n", + "mean 41.670070 1804.267915 ... 0.184912 0.698242\n", + "std 13.497781 3501.104777 ... 0.388263 0.459064\n", + "min 18.000000 -3058.000000 ... 0.000000 0.000000\n", + "25% 31.000000 210.000000 ... 0.000000 0.000000\n", + "50% 38.000000 733.000000 ... 0.000000 1.000000\n", + "75% 50.000000 2159.000000 ... 0.000000 1.000000\n", + "max 95.000000 81204.000000 ... 1.000000 1.000000\n", + "\n", + "[8 rows x 27 columns]" + ] + }, + "metadata": {}, + "execution_count": 204 + } + ], + "source": [ + "# People who sign up to a term deposite\n", + "bank_with_dummies[bank_data.deposit_cat == 1].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "25c8616f-805c-4abf-b792-4b2e8f39cfcf", + "_uuid": "3d999f6b3775dcc7b25993fcd6ccad1838d8a239", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ix0iu-GWjSOH", + "outputId": "4ffb96cf-65b0-4b99-bc9c-9a1d519c59ab" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "265" + ] + }, + "metadata": {}, + "execution_count": 205 + } + ], + "source": [ + "# People signed up to a term deposite having a personal loan (loan_cat) and housing loan (housing_cat)\n", + "len(bank_with_dummies[(bank_with_dummies.deposit_cat == 1) & (bank_with_dummies.loan_cat) & (bank_with_dummies.housing_cat)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "ecb235b0-9f6b-4a35-9a71-efd7525b4836", + "_uuid": "33866e95d069281a68c5f0bcfaa272603e665390", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uUGjyL4hjSOH", + "outputId": "e947e93e-edbc-49f2-a782-710e43e03b96" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "52" + ] + }, + "metadata": {}, + "execution_count": 206 + } + ], + "source": [ + "# People signed up to a term deposite with a credit default \n", + "len(bank_with_dummies[(bank_with_dummies.deposit_cat == 1) & (bank_with_dummies.default_cat ==1)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "fae049f5-fa22-4db2-9e65-2b0088ed6c2c", + "_uuid": "037a420ecb420ad6b97029219d36d4f9147d52a6", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 405 + }, + "id": "MWb3Oax-jSOH", + "outputId": "351d9998-8f9f-4e2c-d266-23c9615d4f23" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 207 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Bar chart of job Vs deposite\n", + "plt.figure(figsize = (10,6))\n", + "sns.barplot(x='job', y = 'deposit_cat', data = bank_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "1a35a8e5-3df3-470c-9bf1-71a476f587e6", + "_uuid": "7f5cb673253553269fcb783d941d7b37499b9a3e", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 405 + }, + "id": "_DIK-kl5jSOH", + "outputId": "871b5cd6-3191-4c02-e4d1-caf93b51cae1" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 208 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Bar chart of \"previous outcome\" Vs \"call duration\"\n", + "\n", + "plt.figure(figsize = (10,6))\n", + "sns.barplot(x='poutcome', y = 'duration', data = bank_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "24492c44-5b2a-4878-9236-df9297680902", + "_uuid": "d583dd9fbc7df31d950d78bcb62f2156a6c14ed3", + "id": "LBf4DIGEjSOI" + }, + "source": [ + "> ## Classification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "9365fde6-e602-4996-a79a-2d7e4ec3bee2", + "_uuid": "4482103b89dd40dbb5ac9bbafee6e85da23c27c3", + "collapsed": true, + "id": "N3604bVcjSOI" + }, + "outputs": [], + "source": [ + "# make a copy\n", + "bankcl = bank_with_dummies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "edf85d97-fec6-4f90-a3d0-02a1ec094a08", + "_uuid": "ff08aff610ab1676acc902da63dc35ae1c321673", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 977 + }, + "id": "BPeiKPbEjSOI", + "outputId": "63503e02-67e8-4379-9423-837932279bee" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agebalancedurationcampaignpreviousdefault_cathousing_catloan_catdeposit_catrecent_pdaysjob_blue-collarjob_entrepreneurjob_otherjob_pink-collarjob_self-employedjob_technicianjob_white-collarmarital_divorcedmarital_marriedmarital_singleeducation_primaryeducation_secondaryeducation_tertiaryeducation_unknownpoutcome_failurepoutcome_successpoutcome_unknown
age1.0000000.1123000.000189-0.0052780.020169-0.011425-0.168700-0.0314180.0349010.019102-0.0665670.0241760.296418-0.027942-0.023163-0.082716-0.0801220.1863490.318436-0.4677990.231150-0.094400-0.1013720.077761-0.0080710.062114-0.038992
balance0.1123001.0000000.022436-0.0138940.030805-0.060954-0.077092-0.0845890.081129-0.004379-0.0462200.0050390.050744-0.0410630.0202640.0038020.013780-0.0175860.025431-0.014994-0.000673-0.0706090.0691280.0145960.0016950.045603-0.034524
duration0.0001890.0224361.000000-0.041557-0.026716-0.0097600.035051-0.0019140.451919-0.0148680.029986-0.0009080.0106800.0053450.013506-0.010440-0.0319800.021364-0.0361790.0238470.0134050.003820-0.006813-0.015887-0.033966-0.0225780.042725
campaign-0.005278-0.013894-0.0415571.000000-0.0496990.0309750.0066600.034722-0.128081-0.0262960.0055220.013883-0.0502120.0119580.0017760.0217380.001944-0.0068280.047722-0.0461650.019915-0.013834-0.0054270.012976-0.080188-0.0918070.128907
previous0.0201690.030805-0.026716-0.0496991.000000-0.035273-0.000840-0.0226680.1398670.122076-0.039939-0.0224700.031191-0.028623-0.0023380.0020350.034929-0.026566-0.0051760.023817-0.024852-0.0046200.028146-0.0118980.3358700.325477-0.496921
default_cat-0.011425-0.060954-0.0097600.030975-0.0352731.0000000.0110760.076434-0.040680-0.0112900.0227790.022060-0.018130-0.0071730.0074930.003109-0.0134250.019633-0.006819-0.0062550.013858-0.000618-0.0117680.005421-0.024650-0.0402720.048403
housing_cat-0.168700-0.0770920.0350510.006660-0.0008400.0110761.0000000.076761-0.203888-0.0293500.1898480.011492-0.2333090.043884-0.0169030.006551-0.0121110.0074300.036305-0.0438170.0170020.118514-0.114955-0.0531910.087741-0.1362990.031375
loan_cat-0.031418-0.084589-0.0019140.034722-0.0226680.0764340.0767611.000000-0.110580-0.0126970.0579560.042631-0.0961960.0149690.0042990.006864-0.0078710.0264630.044148-0.0652880.0068540.079583-0.067513-0.0502490.006264-0.0803700.053686
deposit_cat0.0349010.0811290.451919-0.1280810.139867-0.040680-0.203888-0.1105801.0000000.034457-0.100840-0.0344430.144408-0.051717-0.004707-0.0115570.0316210.005228-0.0921570.094632-0.063002-0.0519520.0945980.0143550.0207140.286642-0.224785
recent_pdays0.019102-0.004379-0.014868-0.0262960.122076-0.011290-0.029350-0.0126970.0344571.000000-0.0185140.0062510.024356-0.001183-0.008226-0.0074120.004516-0.0202530.0095830.003736-0.007034-0.0171290.0173460.0135900.0514220.119598-0.126890
job_blue-collar-0.066567-0.0462200.0299860.005522-0.0399390.0227790.1898480.057956-0.100840-0.0185141.000000-0.079905-0.185447-0.159162-0.089107-0.202896-0.336538-0.0562400.109188-0.0776450.2997370.076687-0.298548-0.000640-0.018022-0.0774220.070330
job_entrepreneur0.0241760.005039-0.0009080.013883-0.0224700.0220600.0114920.042631-0.0344430.006251-0.0799051.000000-0.070264-0.060305-0.033762-0.076875-0.1275110.0066380.050746-0.058665-0.004788-0.0211320.026612-0.001555-0.001840-0.0350720.026966
job_other0.2964180.0507440.010680-0.0502120.031191-0.018130-0.233309-0.0961960.1444080.024356-0.185447-0.0702641.000000-0.139958-0.078356-0.178415-0.2959330.032824-0.0309820.0104130.114003-0.020532-0.1103830.112986-0.0108650.099733-0.064228
job_pink-collar-0.027942-0.0410630.0053450.011958-0.028623-0.0071730.0438840.014969-0.051717-0.001183-0.159162-0.060305-0.1399581.000000-0.067250-0.153127-0.2539880.0256400.007558-0.0257180.0561500.137129-0.184418-0.004629-0.010816-0.0303310.030459
job_self-employed-0.0231630.0202640.0135060.001776-0.0023380.007493-0.0169030.004299-0.004707-0.008226-0.089107-0.033762-0.078356-0.0672501.000000-0.085728-0.142196-0.011849-0.0081640.016864-0.037121-0.0600800.097929-0.016336-0.010039-0.0013990.008786
job_technician-0.0827160.003802-0.0104400.0217380.0020350.0031090.0065510.006864-0.011557-0.007412-0.202896-0.076875-0.178415-0.153127-0.0857281.000000-0.323778-0.005434-0.0524920.059696-0.1449480.152542-0.041988-0.0342760.005763-0.0147440.006279
job_white-collar-0.0801220.013780-0.0319800.0019440.034929-0.013425-0.012111-0.0078710.0316210.004516-0.336538-0.127511-0.295933-0.253988-0.142196-0.3237781.0000000.010701-0.0432700.038752-0.229245-0.2222610.422261-0.0452330.0293870.033044-0.046804
marital_divorced0.186349-0.0175860.021364-0.006828-0.0265660.0196330.0074300.0264630.005228-0.020253-0.0562400.0066380.0328240.025640-0.011849-0.0054340.0107011.000000-0.415878-0.2455560.0248210.009891-0.024597-0.008920-0.026169-0.0181200.033445
marital_married0.3184360.025431-0.0361790.047722-0.005176-0.0068190.0363050.044148-0.0921570.0095830.1091880.050746-0.0309820.007558-0.008164-0.052492-0.043270-0.4158781.000000-0.7794550.1302320.001536-0.0984490.0054510.007682-0.0100630.001384
marital_single-0.467799-0.0149940.023847-0.0461650.023817-0.006255-0.043817-0.0652880.0946320.003736-0.077645-0.0586650.010413-0.0257180.0168640.0596960.038752-0.245556-0.7794551.000000-0.155917-0.0084500.1218840.0003340.0098380.023208-0.024514
education_primary0.231150-0.0006730.0134050.019915-0.0248520.0138580.0170020.006854-0.063002-0.0070340.299737-0.0047880.1140030.056150-0.037121-0.144948-0.2292450.0248210.130232-0.1559171.000000-0.386670-0.276834-0.085057-0.026044-0.0498790.056477
education_secondary-0.094400-0.0706090.003820-0.013834-0.004620-0.0006180.1185140.079583-0.051952-0.0171290.076687-0.021132-0.0205320.137129-0.0600800.152542-0.2222610.0098910.001536-0.008450-0.3866701.000000-0.689501-0.2118490.010625-0.0294660.013238
education_tertiary-0.1013720.069128-0.006813-0.0054270.028146-0.011768-0.114955-0.0675130.0945980.017346-0.2985480.026612-0.110383-0.1844180.097929-0.0419880.422261-0.024597-0.0984490.121884-0.276834-0.6895011.000000-0.1516720.0122650.059518-0.052836
education_unknown0.0777610.014596-0.0158870.012976-0.0118980.005421-0.053191-0.0502490.0143550.013590-0.000640-0.0015550.112986-0.004629-0.016336-0.034276-0.045233-0.0089200.0054510.000334-0.085057-0.211849-0.1516721.000000-0.0106580.018158-0.004978
poutcome_failure-0.0080710.001695-0.033966-0.0801880.335870-0.0246500.0877410.0062640.0207140.051422-0.018022-0.001840-0.010865-0.010816-0.0100390.0057630.029387-0.0261690.0076820.009838-0.0260440.0106250.012265-0.0106581.000000-0.114542-0.690332
poutcome_success0.0621140.045603-0.022578-0.0918070.325477-0.040272-0.136299-0.0803700.2866420.119598-0.077422-0.0350720.099733-0.030331-0.001399-0.0147440.033044-0.018120-0.0100630.023208-0.049879-0.0294660.0595180.018158-0.1145421.000000-0.639659
poutcome_unknown-0.038992-0.0345240.0427250.128907-0.4969210.0484030.0313750.053686-0.224785-0.1268900.0703300.026966-0.0642280.0304590.0087860.006279-0.0468040.0334450.001384-0.0245140.0564770.013238-0.052836-0.004978-0.690332-0.6396591.000000
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " age balance ... poutcome_success poutcome_unknown\n", + "age 1.000000 0.112300 ... 0.062114 -0.038992\n", + "balance 0.112300 1.000000 ... 0.045603 -0.034524\n", + "duration 0.000189 0.022436 ... -0.022578 0.042725\n", + "campaign -0.005278 -0.013894 ... -0.091807 0.128907\n", + "previous 0.020169 0.030805 ... 0.325477 -0.496921\n", + "default_cat -0.011425 -0.060954 ... -0.040272 0.048403\n", + "housing_cat -0.168700 -0.077092 ... -0.136299 0.031375\n", + "loan_cat -0.031418 -0.084589 ... -0.080370 0.053686\n", + "deposit_cat 0.034901 0.081129 ... 0.286642 -0.224785\n", + "recent_pdays 0.019102 -0.004379 ... 0.119598 -0.126890\n", + "job_blue-collar -0.066567 -0.046220 ... -0.077422 0.070330\n", + "job_entrepreneur 0.024176 0.005039 ... -0.035072 0.026966\n", + "job_other 0.296418 0.050744 ... 0.099733 -0.064228\n", + "job_pink-collar -0.027942 -0.041063 ... -0.030331 0.030459\n", + "job_self-employed -0.023163 0.020264 ... -0.001399 0.008786\n", + "job_technician -0.082716 0.003802 ... -0.014744 0.006279\n", + "job_white-collar -0.080122 0.013780 ... 0.033044 -0.046804\n", + "marital_divorced 0.186349 -0.017586 ... -0.018120 0.033445\n", + "marital_married 0.318436 0.025431 ... -0.010063 0.001384\n", + "marital_single -0.467799 -0.014994 ... 0.023208 -0.024514\n", + "education_primary 0.231150 -0.000673 ... -0.049879 0.056477\n", + "education_secondary -0.094400 -0.070609 ... -0.029466 0.013238\n", + "education_tertiary -0.101372 0.069128 ... 0.059518 -0.052836\n", + "education_unknown 0.077761 0.014596 ... 0.018158 -0.004978\n", + "poutcome_failure -0.008071 0.001695 ... -0.114542 -0.690332\n", + "poutcome_success 0.062114 0.045603 ... 1.000000 -0.639659\n", + "poutcome_unknown -0.038992 -0.034524 ... -0.639659 1.000000\n", + "\n", + "[27 rows x 27 columns]" + ] + }, + "metadata": {}, + "execution_count": 210 + } + ], + "source": [ + "# The Correltion matrix\n", + "corr = bankcl.corr()\n", + "corr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "2eb696c8-d468-4642-b7e6-ef3deca070ac", + "_uuid": "0f56d9c3946d4a50e0048c89ae8373e2204bc61a", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 624 + }, + "id": "QiVzvS2SjSOI", + "outputId": "2753b00b-57c6-4d22-8fed-d35c42098a73" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Heatmap of Correlation Matrix')" + ] + }, + "metadata": {}, + "execution_count": 213 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Heatmap\n", + "plt.figure(figsize = (10,10))\n", + "cmap = sns.diverging_palette(220, 10, as_cmap=True)\n", + "sns.heatmap(corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values, cmap=cmap, vmax=.3, center=0, square=True, linewidths=.5, cbar_kws={\"shrink\": .82})\n", + "plt.title('Heatmap of Correlation Matrix')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "73994c65-9028-4ddc-98a1-5b7cdb10a113", + "_uuid": "ff6ca40f9fd1e7710aa3b0c0ad020ba73aa34a5e", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 865 + }, + "id": "eAzYBXGqjSOJ", + "outputId": "f5c58f5d-34b5-4cab-a678-7b6d7882c29d" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
deposit_cat
duration0.451919
poutcome_success0.286642
job_other0.144408
previous0.139867
marital_single0.094632
education_tertiary0.094598
balance0.081129
age0.034901
recent_pdays0.034457
job_white-collar0.031621
poutcome_failure0.020714
education_unknown0.014355
marital_divorced0.005228
job_self-employed-0.004707
job_technician-0.011557
job_entrepreneur-0.034443
default_cat-0.040680
job_pink-collar-0.051717
education_secondary-0.051952
education_primary-0.063002
marital_married-0.092157
job_blue-collar-0.100840
loan_cat-0.110580
campaign-0.128081
housing_cat-0.203888
poutcome_unknown-0.224785
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " deposit_cat\n", + "duration 0.451919\n", + "poutcome_success 0.286642\n", + "job_other 0.144408\n", + "previous 0.139867\n", + "marital_single 0.094632\n", + "education_tertiary 0.094598\n", + "balance 0.081129\n", + "age 0.034901\n", + "recent_pdays 0.034457\n", + "job_white-collar 0.031621\n", + "poutcome_failure 0.020714\n", + "education_unknown 0.014355\n", + "marital_divorced 0.005228\n", + "job_self-employed -0.004707\n", + "job_technician -0.011557\n", + "job_entrepreneur -0.034443\n", + "default_cat -0.040680\n", + "job_pink-collar -0.051717\n", + "education_secondary -0.051952\n", + "education_primary -0.063002\n", + "marital_married -0.092157\n", + "job_blue-collar -0.100840\n", + "loan_cat -0.110580\n", + "campaign -0.128081\n", + "housing_cat -0.203888\n", + "poutcome_unknown -0.224785" + ] + }, + "metadata": {}, + "execution_count": 214 + } + ], + "source": [ + "# Extract the deposte_cat column (the dependent variable)\n", + "corr_deposite = pd.DataFrame(corr['deposit_cat'].drop('deposit_cat'))\n", + "corr_deposite.sort_values(by = 'deposit_cat', ascending = False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "c281ea25-423f-4477-bab5-e1c262d20931", + "_uuid": "0394776ec1c21f2c30fc66a1ce7a22c7a5e9df5f", + "id": "PkC-G_rfjSOJ" + }, + "source": [ + "> ## Build the Data Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "5675ce89-28f8-409d-9387-ce4641bc4a7b", + "_uuid": "3b14efa859cd38fce1e118ceae7144d8563dc014", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "emXrBWQwjSOJ", + "outputId": "2617badf-d078-412b-91ed-97e231bca3ac" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: FutureWarning:\n", + "\n", + "In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only\n", + "\n" + ] + } + ], + "source": [ + "# Train-Test split: 20% test data\n", + "data_drop_deposite = bankcl.drop('deposit_cat', 1)\n", + "label = bankcl.deposit_cat\n", + "data_train, data_test, label_train, label_test = train_test_split(data_drop_deposite, label, test_size = 0.2, random_state = 50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "7b8c8772-a2ec-4399-807e-73ccc4951dd1", + "_uuid": "4a48b73d67f4f57349d3dbc75c4d02cfeb1d82f9", + "collapsed": true, + "scrolled": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UdYHY7sFjSOK", + "outputId": "4705992f-0371-443a-c24a-4bf5ba5a3495" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training score: 0.7285250307985217\n", + "Testing score: 0.7268248992386923\n" + ] + } + ], + "source": [ + "# Decision tree with depth = 2\n", + "dt2 = tree.DecisionTreeClassifier(random_state=1, max_depth=2)\n", + "dt2.fit(data_train, label_train)\n", + "dt2_score_train = dt2.score(data_train, label_train)\n", + "print(\"Training score: \",dt2_score_train)\n", + "dt2_score_test = dt2.score(data_test, label_test)\n", + "print(\"Testing score: \",dt2_score_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "14b771bd-b87e-4ac4-b3ec-64da8bbb7b68", + "_uuid": "c828f56ae83bf3eac076c3642ad26e51f660d643", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k3muNWcxjSOK", + "outputId": "31726294-07b8-4669-ac77-0b2a481c13d7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training score: 0.770411020271027\n", + "Testing score: 0.7572772055530677\n" + ] + } + ], + "source": [ + "# Decision tree with depth = 3\n", + "dt3 = tree.DecisionTreeClassifier(random_state=1, max_depth=3)\n", + "dt3.fit(data_train, label_train)\n", + "dt3_score_train = dt3.score(data_train, label_train)\n", + "print(\"Training score: \",dt3_score_train)\n", + "dt3_score_test = dt3.score(data_test, label_test)\n", + "print(\"Testing score: \",dt3_score_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "60190973-9780-4b6a-9762-7e43d442db6b", + "_uuid": "f45e7da9e4052b837a3c363f03de439d16cca6d0", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Sz_KKDSgjSOK", + "outputId": "5a0ea4d7-d8eb-424c-eb59-d8a54cfb917f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training score: 0.7885541494008288\n", + "Testing score: 0.774294670846395\n" + ] + } + ], + "source": [ + "# Decision tree with depth = 4\n", + "dt4 = tree.DecisionTreeClassifier(random_state=1, max_depth=4)\n", + "dt4.fit(data_train, label_train)\n", + "dt4_score_train = dt4.score(data_train, label_train)\n", + "print(\"Training score: \",dt4_score_train)\n", + "dt4_score_test = dt4.score(data_test, label_test)\n", + "print(\"Testing score: \",dt4_score_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "0f61abaa-9d1e-4fa7-83e7-23f11bdf90e8", + "_uuid": "a3b51e1daf328cc5157234c3e4bf0be5174ce62f", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HY5r1BWajSOK", + "outputId": "570eab5b-7d84-4e53-c372-dc1721bb0a61" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training score: 0.8080412140217269\n", + "Testing score: 0.7796686072548141\n" + ] + } + ], + "source": [ + "# Decision tree with depth = 6\n", + "dt6 = tree.DecisionTreeClassifier(random_state=1, max_depth=6)\n", + "dt6.fit(data_train, label_train)\n", + "dt6_score_train = dt6.score(data_train, label_train)\n", + "print(\"Training score: \",dt6_score_train)\n", + "dt6_score_test = dt6.score(data_test, label_test)\n", + "print(\"Testing score: \",dt6_score_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "32a4d8a7-b285-47c2-988c-8aa6a66933e2", + "_uuid": "cabee45b6dd75bd0aac571320e300c5d2c94c045", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "26B_n7VFjSOL", + "outputId": "57396820-fddd-4b09-95bd-257edb34b376" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training score: 1.0\n", + "Testing score: 0.7366771159874608\n" + ] + } + ], + "source": [ + "# Decision tree: To the full depth\n", + "dt1 = tree.DecisionTreeClassifier()\n", + "dt1.fit(data_train, label_train)\n", + "dt1_score_train = dt1.score(data_train, label_train)\n", + "print(\"Training score: \", dt1_score_train)\n", + "dt1_score_test = dt1.score(data_test, label_test)\n", + "print(\"Testing score: \", dt1_score_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "12a029ef-2d45-452c-927d-2f826b5f9a68", + "_uuid": "e1c1dad3c3e239f3f4b19d4b15da9949c2bd9ec2", + "id": "kgKWFZj9jSOL" + }, + "source": [ + "#### Compare Training and Testing scores for various tree depths used" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "6fb20c58-b5dc-4944-9b78-258d2f2e0da2", + "_uuid": "92eee54d59182aeed9b828fe2a57818ffcfcbb31", + "collapsed": true, + "scrolled": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2y2E0riKjSOL", + "outputId": "6f6266fb-4877-47d8-cb95-a6ac12a8fc48" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "depth Training score Testing score \n", + "----- -------------- ------------- \n", + "2 0.7285250307985217 0.7268248992386923\n", + "3 0.770411020271027 0.7572772055530677\n", + "4 0.7885541494008288 0.774294670846395\n", + "6 0.8080412140217269 0.7796686072548141\n", + "max 1.0 0.7366771159874608\n" + ] + } + ], + "source": [ + "print('{:10} {:20} {:20}'.format('depth', 'Training score','Testing score'))\n", + "print('{:10} {:20} {:20}'.format('-----', '--------------','-------------'))\n", + "print('{:1} {:>25} {:>20}'.format(2, dt2_score_train, dt2_score_test))\n", + "print('{:1} {:>25} {:>20}'.format(3, dt3_score_train, dt3_score_test))\n", + "print('{:1} {:>25} {:>20}'.format(4, dt4_score_train, dt4_score_test))\n", + "print('{:1} {:>25} {:>20}'.format(6, dt6_score_train, dt6_score_test))\n", + "print('{:1} {:>23} {:>20}'.format(\"max\", dt1_score_train, dt1_score_test))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "a2ee959f-14f5-4236-8d7f-bfe781d37f49", + "_uuid": "9262f9c49fb675e6b7a107e66ea959868b9176b4", + "id": "LcDhekTOjSOL" + }, + "source": [ + "It could be seen that, higher the depth, training score increases and matches perfects with the training data set. However higher the depth the tree goes, it overfit to the training data set. So it's no use keep increasing the tree depth. According to above observations, tree with a depth of 2 seems more reasonable as both training and test scores are reasonably high." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "868f9549-40a9-45be-b91a-b1b185a882c8", + "_uuid": "051771a3c38a6c2880611ac28c74b3c513baf7bd", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 575 + }, + "id": "SWt3rXrbjSOM", + "outputId": "ed42e508-5259-41e3-ac46-04061826d37a" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Let's generate the decision tree for depth = 2\n", + "# Create a feature vector\n", + "# from sklearn.tree import export_graphviz\n", + "# features = bankcl.columns.tolist()\n", + "# features\n", + "plt.figure(figsize=(18, 10))\n", + "from sklearn import tree\n", + "tree.plot_tree(dt2, max_depth=2, filled=True)\n", + "plt.show()\n", + "# Uncomment below to generate the digraph Tree.\n", + "# tree.export_graphviz(dt2, out_file='tree_depth_2.dot', feature_names=features)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "11e301f0-fd01-4411-bb0d-4dd26c0333ce", + "_uuid": "7780912e57e3e5a3f862b3ba3006c64a7c084249", + "id": "evnOESiLjSOM" + }, + "source": [ + "Based on the decision tree results, it could be seen that higher the \"duration\", bank is able to sign up more people to term deposites." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "eafef089-0ec3-4915-8db1-783a06c77eec", + "_uuid": "ba242f2e1caef4966ebe0f0e7eec41dcc7daebc0", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "B4VqyzwyjSOM", + "outputId": "afe8915c-9509-4c0d-c121-d45eaf91dc62" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([0, 1])" + ] + }, + "metadata": {}, + "execution_count": 228 + } + ], + "source": [ + "# Two classes: 0 = not signed up, 1 = signed up\n", + "dt2.classes_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "2a4d1e56-ec88-471b-b530-57f9c420e447", + "_uuid": "e047c485745e224de5e9a76f656dca9f54760a86", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1dHt8-znjSON", + "outputId": "d0ec158b-0f9b-4640-9955-a0818de0c95b" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['age',\n", + " 'balance',\n", + " 'duration',\n", + " 'campaign',\n", + " 'previous',\n", + " 'default_cat',\n", + " 'housing_cat',\n", + " 'loan_cat',\n", + " 'recent_pdays',\n", + " 'job_blue-collar',\n", + " 'job_entrepreneur',\n", + " 'job_other',\n", + " 'job_pink-collar',\n", + " 'job_self-employed',\n", + " 'job_technician',\n", + " 'job_white-collar',\n", + " 'marital_divorced',\n", + " 'marital_married',\n", + " 'marital_single',\n", + " 'education_primary',\n", + " 'education_secondary',\n", + " 'education_tertiary',\n", + " 'education_unknown',\n", + " 'poutcome_failure',\n", + " 'poutcome_success',\n", + " 'poutcome_unknown']" + ] + }, + "metadata": {}, + "execution_count": 229 + } + ], + "source": [ + "# Create a feature vector\n", + "features = data_drop_deposite.columns.tolist()\n", + "\n", + "features" + ] + }, + { + "cell_type": "markdown", + "source": [ + "##HyperParameter Tunning \n", + "**GridSearch CV**" + ], + "metadata": { + "id": "SFP6SmeHdByv" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "def dtree_grid_search(X,y,nfolds):\n", + " #create a dictionary of all values we want to test\n", + " param_grid = { 'criterion':['gini','entropy'],'max_depth': np.arange(1, 10)}\n", + " # decision tree model\n", + " dtree_model=DecisionTreeClassifier()\n", + " #use gridsearch to test all values\n", + " dtree_gscv = GridSearchCV(dtree_model, param_grid, cv=nfolds)\n", + " #fit model to data\n", + " dtree_gscv.fit(X, y)\n", + " return dtree_gscv.best_params_" + ], + "metadata": { + "id": "bu3i4DhqdBKG" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# data_train, data_test, label_train, label_test\n", + "print(dtree_grid_search(data_train, label_train, 5))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GW5WRx_CeU9i", + "outputId": "0ccdb083-01ef-4241-e724-45e415d0ce46" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'criterion': 'entropy', 'max_depth': 6}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "hDt = DecisionTreeClassifier(criterion='entropy', max_depth=6)\n", + "hDt.fit(data_train, label_train)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Vu5yQFNFizwc", + "outputId": "0dfdb918-2605-4ff2-eb7b-c1f0d9bf94c4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "DecisionTreeClassifier(criterion='entropy', max_depth=6)" + ] + }, + "metadata": {}, + "execution_count": 242 + } + ] + }, + { + "cell_type": "code", + "source": [ + "hDt_score_train = hDt.score(data_train, label_train)\n", + "print(\"Training score: \", hDt_score_train)\n", + "hDt_score_test = hDt.score(data_test, label_test)\n", + "print(\"Testing score: \", hDt_score_test)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WGbnu9ibjODS", + "outputId": "7e2abb1b-3cf1-462d-cded-101cd680b7a5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training score: 0.8072572516519207\n", + "Testing score: 0.7814599193909538\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "fi = hDt.feature_importances_\n", + "\n", + "l = len(features)\n", + "for i in range(0, len(features)):\n", + " print('{:.<20} {:3} '.format(features[i], fi[i]))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ywLRzyXVj0VJ", + "outputId": "fd608351-94ca-43b3-e7c1-6d0c24b6cb03" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "age................. 0.0322194077868486 \n", + "balance............. 0.034645817621518214 \n", + "duration............ 0.5850070911505163 \n", + "campaign............ 0.017328135495228898 \n", + "previous............ 0.015113434294132418 \n", + "default_cat......... 0.0 \n", + "housing_cat......... 0.103014050777686 \n", + "loan_cat............ 0.0019087945046288106 \n", + "recent_pdays........ 0.015399467520187043 \n", + "job_blue-collar..... 0.0 \n", + "job_entrepreneur.... 0.0 \n", + "job_other........... 0.0 \n", + "job_pink-collar..... 0.0 \n", + "job_self-employed... 0.0 \n", + "job_technician...... 0.0 \n", + "job_white-collar.... 0.0008448795174435656 \n", + "marital_divorced.... 0.0 \n", + "marital_married..... 0.004593648690515507 \n", + "marital_single...... 0.00452510545265586 \n", + "education_primary... 0.0 \n", + "education_secondary. 0.0 \n", + "education_tertiary.. 0.0 \n", + "education_unknown... 0.0 \n", + "poutcome_failure.... 0.0005306766764535086 \n", + "poutcome_success.... 0.1848694905121852 \n", + "poutcome_unknown.... 0.0 \n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "3eba0270-7d71-4254-b1b9-03d25eae3a23", + "_uuid": "caf2f2bfd1e3549d35359927dea802c3e8f3e40e", + "id": "cHUmtaLGjSON" + }, + "source": [ + "## Predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "7d43908a-0eef-4719-90a2-96e486ee3743", + "_uuid": "34edc756644878ec7c1cdbe10310485dc9b386a7", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "psb3agu8jSON", + "outputId": "13d255b8-9207-4baf-b5cb-1e7dbe3257c1" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mean duration : 371.99381831213043\n", + "Maximun duration: 3881\n", + "Minimum duration: 2\n" + ] + } + ], + "source": [ + "# According to feature importance results, most importtant feature is the \"Duration\"\n", + "# Let's calculte statistics on Duration\n", + "print(\"Mean duration : \", data_drop_deposite.duration.mean())\n", + "print(\"Maximun duration: \", data_drop_deposite.duration.max())\n", + "print(\"Minimum duration: \", data_drop_deposite.duration.min())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "44c20639-552b-4941-98ca-492aaaf987e3", + "_uuid": "f90cc3423d1135a1f0fb8f4850088935de2cc7d3", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MtZ5I4SDjSOO", + "outputId": "997f4103-292d-40e6-bbee-27e92dd9c61e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[0.48515568 0.51484432]]\n", + "[1]\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning:\n", + "\n", + "X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names\n", + "\n", + "/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning:\n", + "\n", + "X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names\n", + "\n" + ] + } + ], + "source": [ + "# Predict: Successful deposite with a call duration = 371 sec\n", + "\n", + "print(dt2.predict_proba(np.array([0, 0, 371, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]).reshape(1, -1)))\n", + "print(dt2.predict(np.array([0, 0, 371, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]).reshape(1, -1)))\n", + "# column 0: probability for class 0 (not signed for term deposite) & column 1: probability for class 1\n", + "# Probability of Successful deposite = 0.51484432" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "45ce9752-9a0e-49ae-8ae6-62f39e984fc1", + "_uuid": "01b7620cb265ebae4ec3289e20b49405324d25f0", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0gH4KGSJjSOO", + "outputId": "325b62c9-5e79-44c9-a6b5-ff1f6b97e58a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[0.01219512 0.98780488]]\n", + "[1]\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning:\n", + "\n", + "X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names\n", + "\n", + "/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning:\n", + "\n", + "X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names\n", + "\n" + ] + } + ], + "source": [ + "# Predict: Successful deposite with a maximun call duration = 3881 sec\n", + "\n", + "print(hDt.predict_proba(np.array([0, 0, 3881, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]).reshape(1, -1)))\n", + "print(hDt.predict(np.array([0, 0, 3881, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]).reshape(1, -1)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "49cc5af7-e45e-465e-8496-f6e64ddffa06", + "_uuid": "0f0d54a2141b6a7906a93fbc0cf73b2cfe1dfecc", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "PUyWIDH_jSOO", + "outputId": "04fbe96f-4dc2-454b-e05b-cef66b84b76b" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "age 46.000000\n", + "balance 3354.000000\n", + "duration 522.000000\n", + "campaign 1.000000\n", + "previous 1.000000\n", + "default_cat 0.000000\n", + "housing_cat 1.000000\n", + "loan_cat 0.000000\n", + "recent_pdays 0.005747\n", + "job_blue-collar 0.000000\n", + "job_entrepreneur 0.000000\n", + "job_other 1.000000\n", + "job_pink-collar 0.000000\n", + "job_self-employed 0.000000\n", + "job_technician 0.000000\n", + "job_white-collar 0.000000\n", + "marital_divorced 1.000000\n", + "marital_married 0.000000\n", + "marital_single 0.000000\n", + "education_primary 0.000000\n", + "education_secondary 1.000000\n", + "education_tertiary 0.000000\n", + "education_unknown 0.000000\n", + "poutcome_failure 0.000000\n", + "poutcome_success 1.000000\n", + "poutcome_unknown 0.000000\n", + "Name: 985, dtype: float64" + ] + }, + "metadata": {}, + "execution_count": 252 + } + ], + "source": [ + "# Get a row with poutcome_success = 1\n", + "#bank_with_dummies[(bank_with_dummies.poutcome_success == 1)]\n", + "data_drop_deposite.iloc[985]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "2e1045dd-ab80-4ebd-8daa-9b51c217f1cc", + "_uuid": "735a8e7498209fd60fd7ece2d610e0cd12793715", + "collapsed": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Tect9bKejSOP", + "outputId": "2cae1189-b832-4ae5-ecc1-d00bfaec85bb" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[0.19295499 0.80704501]]\n", + "[[0. 1.]]\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning:\n", + "\n", + "X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names\n", + "\n", + "/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning:\n", + "\n", + "X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names\n", + "\n" + ] + } + ], + "source": [ + "# Predict: Probability for above\n", + "\n", + "print(dt2.predict_proba(np.array([46,3354,522,1,1,0,1,0,0.005747,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0]).reshape(1, -1)))\n", + "print(hDt.predict_proba(np.array([46,3354,522,1,1,0,1,0,0.005747,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0]).reshape(1, -1)))" + ] + }, + { + "cell_type": "code", + "source": [ + "# Make predictions on the test set\n", + "preds = hDt.predict(data_test)\n", + "\n", + "# Calculate accuracy\n", + "print(\"\\nAccuracy score: \\n{}\".format(metrics.accuracy_score(label_test, preds)))\n", + "\n", + "# Make predictions on the test set using predict_proba\n", + "probs = hDt.predict_proba(data_test)[:,1]\n", + "\n", + "# Calculate the AUC metric\n", + "print(\"\\nArea Under Curve: \\n{}\".format(metrics.roc_auc_score(label_test, probs)))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JQnPz3Yum367", + "outputId": "7954ca5d-a856-4666-fbc3-24939172af55" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Accuracy score: \n", + "0.7814599193909538\n", + "\n", + "Area Under Curve: \n", + "0.8568148916662646\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "1a0663b5-bb57-4bee-83cb-3713cd1bfd49", + "_uuid": "7b6c6107fab5f61fedc06523adb4457b7521001d", + "collapsed": true, + "scrolled": true, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yuRpx2ajjSOP", + "outputId": "bb25e8a1-b2c8-446d-d411-7623bf8cd770" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Accuracy score: \n", + "0.7268248992386923\n", + "\n", + "Area Under Curve: \n", + "0.7880265888143609\n" + ] + } + ], + "source": [ + "# Make predictions on the test set\n", + "preds = dt2.predict(data_test)\n", + "\n", + "# Calculate accuracy\n", + "print(\"\\nAccuracy score: \\n{}\".format(metrics.accuracy_score(label_test, preds)))\n", + "\n", + "# Make predictions on the test set using predict_proba\n", + "probs = dt2.predict_proba(data_test)[:,1]\n", + "\n", + "# Calculate the AUC metric\n", + "print(\"\\nArea Under Curve: \\n{}\".format(metrics.roc_auc_score(label_test, probs)))" + ] + }, + { + "cell_type": "markdown", + "source": [ + "##confusion matrix " + ], + "metadata": { + "id": "rvK-VZ-yqFMH" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.metrics import confusion_matrix\n" + ], + "metadata": { + "id": "fdXY8FJTnNgW" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "y_predTrain = hDt.predict(data_train)\n", + "confusion_matrix(label_train, y_predTrain)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oUe-35K8cbn_", + "outputId": "31bf8d4c-bf32-4380-e8c0-291f99945e9f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[3791, 909],\n", + " [ 812, 3417]])" + ] + }, + "metadata": {}, + "execution_count": 273 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ypredTest = hDt.predict(data_test)" + ], + "metadata": { + "id": "BbdruFUWoKlc" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "confusion_matrix(label_test, ypredTest)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OSe5OzoToliE", + "outputId": "be34cc53-fec6-489b-d110-92d26a1ff23d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[912, 261],\n", + " [227, 833]])" + ] + }, + "metadata": {}, + "execution_count": 275 + } + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + }, + "colab": { + "name": "Solution Decision Tree.ipynb", + "provenance": [], + "collapsed_sections": [ + "7tdtMFRCjSN_", + "QuXz1WcojSOA", + "Ti0iUujFjSOB", + "Lx4MVHu2jSOC", + "0gZW1UaujSOC", + "huql5gAajSOC", + "j3rFMDx0jSOD", + "o8-RUiIhjSOD", + "2xP7i2LBjSOF", + "Y_KjIwWtjSOG", + "fwi0C0L9jSOG", + "LBf4DIGEjSOI", + "PkC-G_rfjSOJ", + "kgKWFZj9jSOL", + "SFP6SmeHdByv", + "cHUmtaLGjSON", + "rvK-VZ-yqFMH" + ], + "include_colab_link": true + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file