From e799f1fc6c4b5121a0c77dd72c2e1d8ba6313d45 Mon Sep 17 00:00:00 2001
From: Awantik Das <awantikdas@gmail.com>
Date: Thu, 29 Nov 2018 17:17:35 +0530
Subject: [PATCH] Add files via upload

---
 ...dy 1 - Employee Attrition Prediction.ipynb | 1106 +++++++++++++++++
 ... 2 - Review Sentiment Classification.ipynb |  496 ++++++++
 Case Study Churn Prediction.ipynb             | 1064 ++++++++++++++++
 Case Study Clothe Identification.ipynb        |  216 ++++
 4 files changed, 2882 insertions(+)
 create mode 100644 Case Study 1 - Employee Attrition Prediction.ipynb
 create mode 100644 Case Study 2 - Review Sentiment Classification.ipynb
 create mode 100644 Case Study Churn Prediction.ipynb
 create mode 100644 Case Study Clothe Identification.ipynb
diff --git a/Case Study 1 - Employee Attrition Prediction.ipynb b/Case Study 1 - Employee Attrition Prediction.ipynb
new file mode 100644
index 0000000..5e529e0
--- /dev/null
+++ b/Case Study 1 - Employee Attrition Prediction.ipynb	
@@ -0,0 +1,1106 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "emp_data = pd.read_csv('https://raw.githubusercontent.com/zekelabs/data-science-complete-tutorial/master/Data/HR_comma_sep.csv.txt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>satisfaction_level</th>\n",
+       "      <th>last_evaluation</th>\n",
+       "      <th>number_project</th>\n",
+       "      <th>average_montly_hours</th>\n",
+       "      <th>time_spend_company</th>\n",
+       "      <th>Work_accident</th>\n",
+       "      <th>left</th>\n",
+       "      <th>promotion_last_5years</th>\n",
+       "      <th>sales</th>\n",
+       "      <th>salary</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.38</td>\n",
+       "      <td>0.53</td>\n",
+       "      <td>2</td>\n",
+       "      <td>157</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>sales</td>\n",
+       "      <td>low</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.80</td>\n",
+       "      <td>0.86</td>\n",
+       "      <td>5</td>\n",
+       "      <td>262</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>sales</td>\n",
+       "      <td>medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.11</td>\n",
+       "      <td>0.88</td>\n",
+       "      <td>7</td>\n",
+       "      <td>272</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>sales</td>\n",
+       "      <td>medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.72</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>5</td>\n",
+       "      <td>223</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>sales</td>\n",
+       "      <td>low</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.37</td>\n",
+       "      <td>0.52</td>\n",
+       "      <td>2</td>\n",
+       "      <td>159</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>sales</td>\n",
+       "      <td>low</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   satisfaction_level  last_evaluation  number_project  average_montly_hours  \\\n",
+       "0                0.38             0.53               2                   157   \n",
+       "1                0.80             0.86               5                   262   \n",
+       "2                0.11             0.88               7                   272   \n",
+       "3                0.72             0.87               5                   223   \n",
+       "4                0.37             0.52               2                   159   \n",
+       "\n",
+       "   time_spend_company  Work_accident  left  promotion_last_5years  sales  \\\n",
+       "0                   3              0     1                      0  sales   \n",
+       "1                   6              0     1                      0  sales   \n",
+       "2                   4              0     1                      0  sales   \n",
+       "3                   5              0     1                      0  sales   \n",
+       "4                   3              0     1                      0  sales   \n",
+       "\n",
+       "   salary  \n",
+       "0     low  \n",
+       "1  medium  \n",
+       "2  medium  \n",
+       "3     low  \n",
+       "4     low  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "emp_data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "emp_data.rename(columns={'sales':'department'}, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "satisfaction_level      -0.388375\n",
+       "last_evaluation          0.006567\n",
+       "number_project           0.023787\n",
+       "average_montly_hours     0.071287\n",
+       "time_spend_company       0.144822\n",
+       "Work_accident           -0.154622\n",
+       "left                     1.000000\n",
+       "promotion_last_5years   -0.061788\n",
+       "Name: left, dtype: float64"
+      ]
+     },
+     "execution_count": 83,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "emp_data.corr()['left']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Selecting categorical columns & integer columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cat_emp_data = emp_data.select_dtypes('object')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "int_emp_data = emp_data.select_dtypes('int64')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Preprocessing Categorical Columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import LabelEncoder, OneHotEncoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "le = LabelEncoder()\n",
+    "ohe = OneHotEncoder()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LabelEncoder()"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "le.fit(cat_emp_data.department)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\awant\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
+      "  \"\"\"Entry point for launching an IPython kernel.\n"
+     ]
+    }
+   ],
+   "source": [
+    "cat_emp_data['department_tf'] = le.transform(cat_emp_data.department)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>department</th>\n",
+       "      <th>salary</th>\n",
+       "      <th>department_tf</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>sales</td>\n",
+       "      <td>low</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>sales</td>\n",
+       "      <td>medium</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>sales</td>\n",
+       "      <td>medium</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>sales</td>\n",
+       "      <td>low</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>sales</td>\n",
+       "      <td>low</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  department  salary  department_tf\n",
+       "0      sales     low              7\n",
+       "1      sales  medium              7\n",
+       "2      sales  medium              7\n",
+       "3      sales     low              7\n",
+       "4      sales     low              7"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cat_emp_data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['IT', 'RandD', 'accounting', 'hr', 'management', 'marketing',\n",
+       "       'product_mng', 'sales', 'support', 'technical'], dtype=object)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "le.classes_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['sales'], dtype=object)"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "le.inverse_transform([7])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\awant\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:363: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
+      "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
+      "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
+      "  warnings.warn(msg, FutureWarning)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "OneHotEncoder(categorical_features=None, categories=None,\n",
+       "       dtype=<class 'numpy.float64'>, handle_unknown='error',\n",
+       "       n_values='auto', sparse=True)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ohe.fit(cat_emp_data[['department_tf']])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "department_tf = ohe.transform(cat_emp_data[['department_tf']]).toarray()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import FunctionTransformer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def func(x):\n",
+    "    def mapping(d):\n",
+    "        if d == 'low':\n",
+    "            return 1\n",
+    "        elif d == 'medium':\n",
+    "            return 2\n",
+    "        else:\n",
+    "            return 3\n",
+    "    return x.map( mapping )\n",
+    "        \n",
+    "ft = FunctionTransformer(func, validate=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\awant\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
+      "  \"\"\"Entry point for launching an IPython kernel.\n"
+     ]
+    }
+   ],
+   "source": [
+    "cat_emp_data['salary_tf'] = ft.transform(cat_emp_data.salary)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Preprocessing Number Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>number_project</th>\n",
+       "      <th>average_montly_hours</th>\n",
+       "      <th>time_spend_company</th>\n",
+       "      <th>Work_accident</th>\n",
+       "      <th>left</th>\n",
+       "      <th>promotion_last_5years</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2</td>\n",
+       "      <td>157</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>5</td>\n",
+       "      <td>262</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>7</td>\n",
+       "      <td>272</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>5</td>\n",
+       "      <td>223</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2</td>\n",
+       "      <td>159</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   number_project  average_montly_hours  time_spend_company  Work_accident  \\\n",
+       "0               2                   157                   3              0   \n",
+       "1               5                   262                   6              0   \n",
+       "2               7                   272                   4              0   \n",
+       "3               5                   223                   5              0   \n",
+       "4               2                   159                   3              0   \n",
+       "\n",
+       "   left  promotion_last_5years  \n",
+       "0     1                      0  \n",
+       "1     1                      0  \n",
+       "2     1                      0  \n",
+       "3     1                      0  \n",
+       "4     1                      0  "
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "int_emp_data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\awant\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
+      "  \"\"\"Entry point for launching an IPython kernel.\n"
+     ]
+    }
+   ],
+   "source": [
+    "int_emp_data.drop('left',axis=1, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import MinMaxScaler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mms = MinMaxScaler()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\awant\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\data.py:323: DataConversionWarning: Data with input dtype int64 were all converted to float64 by MinMaxScaler.\n",
+      "  return self.partial_fit(X, y)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "MinMaxScaler(copy=True, feature_range=(0, 1))"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mms.fit(int_emp_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "int_tf = mms.transform(int_emp_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0.        , 0.28504673, 0.125     , 0.        , 0.        ],\n",
+       "       [0.6       , 0.77570093, 0.5       , 0.        , 0.        ],\n",
+       "       [1.        , 0.82242991, 0.25      , 0.        , 0.        ],\n",
+       "       ...,\n",
+       "       [0.        , 0.21962617, 0.125     , 0.        , 0.        ],\n",
+       "       [0.8       , 0.85981308, 0.25      , 0.        , 0.        ],\n",
+       "       [0.        , 0.28971963, 0.125     , 0.        , 0.        ]])"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "int_tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "float_tf = emp_data[['satisfaction_level','last_evaluation']].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(14999,)"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cat_emp_data['salary_tf'].values.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(14999, 5)"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "int_tf.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],\n",
+       "       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]])"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "department_tf[:2]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Joining the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "salary_tf = cat_emp_data['salary_tf'].values.reshape(-1,1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "feature_data = np.hstack([department_tf,int_tf,float_tf,salary_tf])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_data = emp_data.left"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Split data into two parts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "trainX, testX, trainY, testY = train_test_split(feature_data,target_data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Model Training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.ensemble import RandomForestClassifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lr = LogisticRegression()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\awant\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "  FutureWarning)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+       "          intercept_scaling=1, max_iter=100, multi_class='warn',\n",
+       "          n_jobs=None, penalty='l2', random_state=None, solver='warn',\n",
+       "          tol=0.0001, verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 76,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "lr.fit(trainX,trainY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rf = RandomForestClassifier()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\awant\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:248: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
+      "  \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
+       "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
+       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "            min_samples_leaf=1, min_samples_split=2,\n",
+       "            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,\n",
+       "            oob_score=False, random_state=None, verbose=0,\n",
+       "            warm_start=False)"
+      ]
+     },
+     "execution_count": 78,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rf.fit(trainX,trainY)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Model Validation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "lr.score(testX,testY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9898666666666667"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rf.score(testX,testY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import recall_score,precision_score, f1_score, classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred = rf.predict(testX)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9904191616766467"
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "precision_score(y_pred=pred, y_true=testY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9775413711583923"
+      ]
+     },
+     "execution_count": 88,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "f1_score(y_pred=pred, y_true=testY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.99      1.00      0.99      2893\n",
+      "           1       0.99      0.96      0.98       857\n",
+      "\n",
+      "   micro avg       0.99      0.99      0.99      3750\n",
+      "   macro avg       0.99      0.98      0.99      3750\n",
+      "weighted avg       0.99      0.99      0.99      3750\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print (classification_report(y_pred=pred, y_true=testY))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Case Study 2 - Review Sentiment Classification.ipynb b/Case Study 2 - Review Sentiment Classification.ipynb
new file mode 100644
index 0000000..78f1746
--- /dev/null
+++ b/Case Study 2 - Review Sentiment Classification.ipynb	
@@ -0,0 +1,496 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "review_data = pd.read_csv('C:/Users/awant/Documents/Reviews.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 568454 entries, 0 to 568453\n",
+      "Data columns (total 10 columns):\n",
+      "Id                        568454 non-null int64\n",
+      "ProductId                 568454 non-null object\n",
+      "UserId                    568454 non-null object\n",
+      "ProfileName               568438 non-null object\n",
+      "HelpfulnessNumerator      568454 non-null int64\n",
+      "HelpfulnessDenominator    568454 non-null int64\n",
+      "Score                     568454 non-null int64\n",
+      "Time                      568454 non-null int64\n",
+      "Summary                   568427 non-null object\n",
+      "Text                      568454 non-null object\n",
+      "dtypes: int64(5), object(5)\n",
+      "memory usage: 43.4+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "review_data.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "review_data = review_data.sample(5000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "review_data = review_data[['Text','Score']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Text</th>\n",
+       "      <th>Score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>22098</th>\n",
+       "      <td>We've had this for almost a week, and the dog ...</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>295111</th>\n",
+       "      <td>I just have to put my two cents in on this tea...</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>125547</th>\n",
+       "      <td>The Hanover Pumpernickle &amp; Onion Pretzels are ...</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>350934</th>\n",
+       "      <td>January 3, 2011&lt;br /&gt;&lt;br /&gt;I purchased these p...</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>502462</th>\n",
+       "      <td>I received these and added them to the collect...</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                     Text  Score\n",
+       "22098   We've had this for almost a week, and the dog ...      4\n",
+       "295111  I just have to put my two cents in on this tea...      5\n",
+       "125547  The Hanover Pumpernickle & Onion Pretzels are ...      5\n",
+       "350934  January 3, 2011<br /><br />I purchased these p...      4\n",
+       "502462  I received these and added them to the collect...      5"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "review_data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "review_data = review_data[review_data.Score != 3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "review_data['Sentiment'] = review_data.Score.map(lambda x: 0 if x < 3 else 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Text</th>\n",
+       "      <th>Score</th>\n",
+       "      <th>Sentiment</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>22098</th>\n",
+       "      <td>We've had this for almost a week, and the dog ...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                    Text  Score  Sentiment\n",
+       "22098  We've had this for almost a week, and the dog ...      4          1"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "review_data.head(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_extraction.text import CountVectorizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cv = CountVectorizer(stop_words='english')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.naive_bayes import MultinomialNB"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mnb = MultinomialNB()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Pipeline Creation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_selection import SelectKBest"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "word_selector = SelectKBest(k=2000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import make_pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "review_pipeline = make_pipeline(cv, word_selector, mnb)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('countvectorizer',\n",
+       "  CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
+       "          dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
+       "          lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
+       "          ngram_range=(1, 1), preprocessor=None, stop_words='english',\n",
+       "          strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
+       "          tokenizer=None, vocabulary=None)),\n",
+       " ('selectkbest',\n",
+       "  SelectKBest(k=2000, score_func=<function f_classif at 0x000001C7C8242BF8>)),\n",
+       " ('multinomialnb', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))]"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "review_pipeline.steps"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Splitting data into train & test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "trainX,testX, trainY,testY = train_test_split(review_data.Text, review_data.Sentiment)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Model Training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Pipeline(memory=None,\n",
+       "     steps=[('countvectorizer', CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
+       "        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
+       "        lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
+       "        ngram_range=(1, 1), preprocessor=None, stop_words='english...x000001C7C8242BF8>)), ('multinomialnb', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "review_pipeline.fit(trainX,trainY)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Model Validation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8871527777777778"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "review_pipeline.score(testX,testY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0], dtype=int64)"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "review_pipeline.predict(['Foul Bad Worse'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1], dtype=int64)"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "review_pipeline.predict(['Good food here'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1    3837\n",
+       "0     769\n",
+       "Name: Sentiment, dtype: int64"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "review_data.Sentiment.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Case Study Churn Prediction.ipynb b/Case Study Churn Prediction.ipynb
new file mode 100644
index 0000000..39fa1a1
--- /dev/null
+++ b/Case Study Churn Prediction.ipynb	
@@ -0,0 +1,1064 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "churn_data = pd.read_csv('https://raw.githubusercontent.com/zekelabs/data-science-complete-tutorial/master/Data/churn.csv.txt',\n",
+    "                         parse_dates=['last_trip_date','signup_date'] )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 50000 entries, 0 to 49999\n",
+      "Data columns (total 12 columns):\n",
+      "avg_dist                  50000 non-null float64\n",
+      "avg_rating_by_driver      49799 non-null float64\n",
+      "avg_rating_of_driver      41878 non-null float64\n",
+      "avg_surge                 50000 non-null float64\n",
+      "city                      50000 non-null object\n",
+      "last_trip_date            50000 non-null datetime64[ns]\n",
+      "phone                     49604 non-null object\n",
+      "signup_date               50000 non-null datetime64[ns]\n",
+      "surge_pct                 50000 non-null float64\n",
+      "trips_in_first_30_days    50000 non-null int64\n",
+      "luxury_car_user           50000 non-null bool\n",
+      "weekday_pct               50000 non-null float64\n",
+      "dtypes: bool(1), datetime64[ns](2), float64(6), int64(1), object(2)\n",
+      "memory usage: 4.2+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "churn_data.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>avg_dist</th>\n",
+       "      <th>avg_rating_by_driver</th>\n",
+       "      <th>avg_rating_of_driver</th>\n",
+       "      <th>avg_surge</th>\n",
+       "      <th>city</th>\n",
+       "      <th>last_trip_date</th>\n",
+       "      <th>phone</th>\n",
+       "      <th>signup_date</th>\n",
+       "      <th>surge_pct</th>\n",
+       "      <th>trips_in_first_30_days</th>\n",
+       "      <th>luxury_car_user</th>\n",
+       "      <th>weekday_pct</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3.67</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>1.10</td>\n",
+       "      <td>King's Landing</td>\n",
+       "      <td>2014-06-17</td>\n",
+       "      <td>iPhone</td>\n",
+       "      <td>2014-01-25</td>\n",
+       "      <td>15.4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>True</td>\n",
+       "      <td>46.2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>8.26</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>Astapor</td>\n",
+       "      <td>2014-05-05</td>\n",
+       "      <td>Android</td>\n",
+       "      <td>2014-01-29</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>50.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.77</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.3</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>Astapor</td>\n",
+       "      <td>2014-01-07</td>\n",
+       "      <td>iPhone</td>\n",
+       "      <td>2014-01-06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>False</td>\n",
+       "      <td>100.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2.36</td>\n",
+       "      <td>4.9</td>\n",
+       "      <td>4.6</td>\n",
+       "      <td>1.14</td>\n",
+       "      <td>King's Landing</td>\n",
+       "      <td>2014-06-29</td>\n",
+       "      <td>iPhone</td>\n",
+       "      <td>2014-01-10</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>9</td>\n",
+       "      <td>True</td>\n",
+       "      <td>80.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3.13</td>\n",
+       "      <td>4.9</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>1.19</td>\n",
+       "      <td>Winterfell</td>\n",
+       "      <td>2014-03-15</td>\n",
+       "      <td>Android</td>\n",
+       "      <td>2014-01-27</td>\n",
+       "      <td>11.8</td>\n",
+       "      <td>14</td>\n",
+       "      <td>False</td>\n",
+       "      <td>82.4</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   avg_dist  avg_rating_by_driver  avg_rating_of_driver  avg_surge  \\\n",
+       "0      3.67                   5.0                   4.7       1.10   \n",
+       "1      8.26                   5.0                   5.0       1.00   \n",
+       "2      0.77                   5.0                   4.3       1.00   \n",
+       "3      2.36                   4.9                   4.6       1.14   \n",
+       "4      3.13                   4.9                   4.4       1.19   \n",
+       "\n",
+       "             city last_trip_date    phone signup_date  surge_pct  \\\n",
+       "0  King's Landing     2014-06-17   iPhone  2014-01-25       15.4   \n",
+       "1         Astapor     2014-05-05  Android  2014-01-29        0.0   \n",
+       "2         Astapor     2014-01-07   iPhone  2014-01-06        0.0   \n",
+       "3  King's Landing     2014-06-29   iPhone  2014-01-10       20.0   \n",
+       "4      Winterfell     2014-03-15  Android  2014-01-27       11.8   \n",
+       "\n",
+       "   trips_in_first_30_days  luxury_car_user  weekday_pct  \n",
+       "0                       4             True         46.2  \n",
+       "1                       0            False         50.0  \n",
+       "2                       3            False        100.0  \n",
+       "3                       9             True         80.0  \n",
+       "4                      14            False         82.4  "
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "churn_data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Timestamp('2014-07-01 00:00:00')"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "churn_data.last_trip_date.max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Timestamp('2014-01-01 00:00:00')"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "churn_data.last_trip_date.min()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "cutoff = churn_data.last_trip_date.max() - datetime.timedelta(30,0,0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Timestamp('2014-06-01 00:00:00')"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cutoff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "churn_data['churn'] = churn_data.last_trip_date.map(lambda d: 1 if d > cutoff else 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>avg_dist</th>\n",
+       "      <th>avg_rating_by_driver</th>\n",
+       "      <th>avg_rating_of_driver</th>\n",
+       "      <th>avg_surge</th>\n",
+       "      <th>city</th>\n",
+       "      <th>last_trip_date</th>\n",
+       "      <th>phone</th>\n",
+       "      <th>signup_date</th>\n",
+       "      <th>surge_pct</th>\n",
+       "      <th>trips_in_first_30_days</th>\n",
+       "      <th>luxury_car_user</th>\n",
+       "      <th>weekday_pct</th>\n",
+       "      <th>churn</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3.67</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>1.10</td>\n",
+       "      <td>King's Landing</td>\n",
+       "      <td>2014-06-17</td>\n",
+       "      <td>iPhone</td>\n",
+       "      <td>2014-01-25</td>\n",
+       "      <td>15.4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>True</td>\n",
+       "      <td>46.2</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>8.26</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>Astapor</td>\n",
+       "      <td>2014-05-05</td>\n",
+       "      <td>Android</td>\n",
+       "      <td>2014-01-29</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>50.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.77</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>4.3</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>Astapor</td>\n",
+       "      <td>2014-01-07</td>\n",
+       "      <td>iPhone</td>\n",
+       "      <td>2014-01-06</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>False</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2.36</td>\n",
+       "      <td>4.9</td>\n",
+       "      <td>4.6</td>\n",
+       "      <td>1.14</td>\n",
+       "      <td>King's Landing</td>\n",
+       "      <td>2014-06-29</td>\n",
+       "      <td>iPhone</td>\n",
+       "      <td>2014-01-10</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>9</td>\n",
+       "      <td>True</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>3.13</td>\n",
+       "      <td>4.9</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>1.19</td>\n",
+       "      <td>Winterfell</td>\n",
+       "      <td>2014-03-15</td>\n",
+       "      <td>Android</td>\n",
+       "      <td>2014-01-27</td>\n",
+       "      <td>11.8</td>\n",
+       "      <td>14</td>\n",
+       "      <td>False</td>\n",
+       "      <td>82.4</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   avg_dist  avg_rating_by_driver  avg_rating_of_driver  avg_surge  \\\n",
+       "0      3.67                   5.0                   4.7       1.10   \n",
+       "1      8.26                   5.0                   5.0       1.00   \n",
+       "2      0.77                   5.0                   4.3       1.00   \n",
+       "3      2.36                   4.9                   4.6       1.14   \n",
+       "4      3.13                   4.9                   4.4       1.19   \n",
+       "\n",
+       "             city last_trip_date    phone signup_date  surge_pct  \\\n",
+       "0  King's Landing     2014-06-17   iPhone  2014-01-25       15.4   \n",
+       "1         Astapor     2014-05-05  Android  2014-01-29        0.0   \n",
+       "2         Astapor     2014-01-07   iPhone  2014-01-06        0.0   \n",
+       "3  King's Landing     2014-06-29   iPhone  2014-01-10       20.0   \n",
+       "4      Winterfell     2014-03-15  Android  2014-01-27       11.8   \n",
+       "\n",
+       "   trips_in_first_30_days  luxury_car_user  weekday_pct  churn  \n",
+       "0                       4             True         46.2      1  \n",
+       "1                       0            False         50.0      0  \n",
+       "2                       3            False        100.0      0  \n",
+       "3                       9             True         80.0      1  \n",
+       "4                      14            False         82.4      0  "
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "churn_data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cat_cols = list(churn_data.select_dtypes('object').columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['city', 'phone']"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cat_cols"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_cols = list(churn_data.select_dtypes('float64').columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_cols.append('trips_in_first_30_days')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['avg_dist',\n",
+       " 'avg_rating_by_driver',\n",
+       " 'avg_rating_of_driver',\n",
+       " 'avg_surge',\n",
+       " 'surge_pct',\n",
+       " 'weekday_pct',\n",
+       " 'trips_in_first_30_days']"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "num_cols"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Pipeline Creation for categorical columns & numerical columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import make_pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.impute import SimpleImputer\n",
+    "from sklearn.preprocessing import OneHotEncoder, MinMaxScaler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cat_pipeline = make_pipeline(SimpleImputer(strategy='constant', fill_value='missing'),\n",
+    "                            OneHotEncoder())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cat_data_tf = cat_pipeline.fit_transform(churn_data[cat_cols]).toarray()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_pipeline = make_pipeline(SimpleImputer(strategy='median'),\n",
+    "                             MinMaxScaler())                            "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_data_tf = num_pipeline.fit_transform(churn_data[num_cols])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\awant\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n",
+      "  \"\"\"Entry point for launching an IPython kernel.\n"
+     ]
+    }
+   ],
+   "source": [
+    "feature_data = np.hstack([cat_data_tf, num_data_tf, churn_data.luxury_car_user.reshape(-1,1)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(50000, 14)"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "feature_data.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_data = churn_data.churn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_selection import SelectKBest\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.model_selection import train_test_split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "final_pipeline = make_pipeline(SelectKBest(k=8), RandomForestClassifier(n_estimators=20))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainX, testX, trainY, testY = train_test_split(feature_data,target_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Pipeline(memory=None,\n",
+       "     steps=[('selectkbest', SelectKBest(k=8, score_func=<function f_classif at 0x000001F05973DB70>)), ('randomforestclassifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
+       "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
+       "            min_impurity_decre...obs=None,\n",
+       "            oob_score=False, random_state=None, verbose=0,\n",
+       "            warm_start=False))])"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "final_pipeline.fit(trainX,trainY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.66072"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "final_pipeline.score(testX,testY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import GridSearchCV"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = {\n",
+    "    'selectkbest__k':[5,8,12],\n",
+    "    'randomforestclassifier__n_estimators':[10,30,50]\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grid_model = GridSearchCV(final_pipeline,param_grid=params, cv=5, n_jobs=-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
+       "       estimator=Pipeline(memory=None,\n",
+       "     steps=[('selectkbest', SelectKBest(k=8, score_func=<function f_classif at 0x000001F05973DB70>)), ('randomforestclassifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
+       "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
+       "            min_impurity_decre...obs=None,\n",
+       "            oob_score=False, random_state=None, verbose=0,\n",
+       "            warm_start=False))]),\n",
+       "       fit_params=None, iid='warn', n_jobs=-1,\n",
+       "       param_grid={'selectkbest__k': [5, 8, 12], 'randomforestclassifier__n_estimators': [10, 30, 50]},\n",
+       "       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
+       "       scoring=None, verbose=0)"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "grid_model.fit(trainX,trainY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'randomforestclassifier__n_estimators': 50, 'selectkbest__k': 12}"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "grid_model.best_params_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7619733333333333"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "grid_model.best_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7592"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "grid_model.score(testX,testY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.compose import ColumnTransformer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['avg_dist',\n",
+       " 'avg_rating_by_driver',\n",
+       " 'avg_rating_of_driver',\n",
+       " 'avg_surge',\n",
+       " 'surge_pct',\n",
+       " 'weekday_pct',\n",
+       " 'trips_in_first_30_days']"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "num_cols"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Using column transformer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "preprocessor = ColumnTransformer(\n",
+    "    transformers=[\n",
+    "        ('num', num_pipeline, num_cols),\n",
+    "        ('cat', cat_pipeline, cat_cols)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import Pipeline\n",
+    "pipeline = Pipeline(steps=[('preprocessor',preprocessor),\n",
+    "                           ('selectkbest', SelectKBest(k=8)),\n",
+    "                           ('classifier',RandomForestClassifier(n_estimators=20))])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#pipeline = make_pipeline(preprocessor, SelectKBest(k=8), RandomForestClassifier(n_estimators=20))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Pipeline(memory=None,\n",
+       "     steps=[('simpleimputer', SimpleImputer(copy=True, fill_value=None, missing_values=nan,\n",
+       "       strategy='median', verbose=0)), ('minmaxscaler', MinMaxScaler(copy=True, feature_range=(0, 1)))])"
+      ]
+     },
+     "execution_count": 98,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "num_pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = {\n",
+    "    'preprocessor__num__simpleimputer__strategy':['mean','median'],\n",
+    "    'selectkbest__k':[12,13],\n",
+    "    'classifier__n_estimators':[50,70]\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grid_model = GridSearchCV(pipeline, param_grid=params,cv=5, n_jobs=-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "feature_data = churn_data.drop('churn',axis=1)\n",
+    "target_data = churn_data.churn\n",
+    "trainX, testX, trainY, testY = train_test_split(feature_data,target_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
+       "       estimator=Pipeline(memory=None,\n",
+       "     steps=[('preprocessor', ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,\n",
+       "         transformer_weights=None,\n",
+       "         transformers=[('num', Pipeline(memory=None,\n",
+       "     steps=[('simpleimputer', SimpleImputer(copy=True, fill_value=None, missing_values=nan,\n",
+       "       strategy='median',...obs=None,\n",
+       "            oob_score=False, random_state=None, verbose=0,\n",
+       "            warm_start=False))]),\n",
+       "       fit_params=None, iid='warn', n_jobs=-1,\n",
+       "       param_grid={'preprocessor__num__simpleimputer__strategy': ['mean', 'median'], 'selectkbest__k': [12, 13], 'classifier__n_estimators': [50, 70]},\n",
+       "       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
+       "       scoring=None, verbose=0)"
+      ]
+     },
+     "execution_count": 102,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "grid_model.fit(trainX,trainY)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7528"
+      ]
+     },
+     "execution_count": 103,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "grid_model.best_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'classifier__n_estimators': 70,\n",
+       " 'preprocessor__num__simpleimputer__strategy': 'mean',\n",
+       " 'selectkbest__k': 12}"
+      ]
+     },
+     "execution_count": 104,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "grid_model.best_params_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Case Study Clothe Identification.ipynb b/Case Study Clothe Identification.ipynb
new file mode 100644
index 0000000..6b434d8
--- /dev/null
+++ b/Case Study Clothe Identification.ipynb	
@@ -0,0 +1,216 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fashion_mnist = pd.read_csv('Data/fashion-mnist_train.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(60000, 785)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fashion_mnist.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "img = fashion_mnist[1:2].values[:,1:].reshape(28,28)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<matplotlib.image.AxesImage at 0x2045b76b4a8>"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAEPBJREFUeJzt3XuM1eWdx/HPV0BBLiLKLUKXbgURiVJDcLWNlxCqS0yEP2rqX9TdSGM0scY/JCamJhuTZrPt7kZjExpJMba2NWI1uq4WgqVew4go3uoFUEdwhosXUEAYvvvH/NhMcX7f53Buv4PP+5WQOXO+85zzcGY+c86Z7+/5PebuApCfE6qeAIBqEH4gU4QfyBThBzJF+IFMEX4gU4QfyBThBzJF+IFMDW3nnZkZhxMCLebuVsvXNfTMb2ZXmNnfzOxdM1vWyG0BaC+r99h+Mxsi6W1JCyR1S1ov6Rp3fyMYwzM/0GLteOafJ+ldd9/s7l9J+r2kqxq4PQBt1Ej4z5D04YDPu4vr/o6ZLTWzLjPrauC+ADRZI3/wG+ylxdde1rv7cknLJV72A52kkWf+bklTB3w+RdK2xqYDoF0aCf96SdPN7NtmdqKkH0l6tDnTAtBqdb/sd/dDZnajpCclDZG0wt1fb9rMALRU3a2+uu6M9/xAy7XlIB8Axy/CD2SK8AOZIvxApgg/kCnCD2SK8AOZIvxApgg/kCnCD2SK8AOZIvxApgg/kCnCD2SK8AOZIvxApgg/kCnCD2SK8AOZIvxApgg/kCnCD2SK8AOZIvxApgg/kCnCD2SK8AOZIvxApgg/kKm6t+iWJDPbKmmPpD5Jh9x9bjMmheYxq2nD1lLt3MX5aIsXLw7rzzzzTFjfsWNHaS31uKT+342O7wQNhb9wmbvvbMLtAGgjXvYDmWo0/C7pKTN7ycyWNmNCANqj0Zf933P3bWY2QdKfzewtd1838AuKXwr8YgA6TEPP/O6+rfjYK+lhSfMG+Zrl7j6XPwYCnaXu8JvZSDMbfeSypB9Ieq1ZEwPQWo287J8o6eGi5TFU0u/c/X+bMisALVd3+N19s6TzmjgX1CnqOXdyv3nMmDFh/dZbbw3rW7ZsCetRn7/Rx6WTH9da0eoDMkX4gUwRfiBThB/IFOEHMkX4gUxZO1sWZnb890fqcMIJ8e/YVn4POnlp6n333RfWJ0yYENZ37doV1m+66abS2s6d8ULURpdCp77n0e339fWFY1PfE3evafI88wOZIvxApgg/kCnCD2SK8AOZIvxApgg/kKlmnL0XCYcPH27p7Uc941S/OTW3RscvW7astDZ+/Phw7AcffBDW586NTw41atSo0lqqzz90aGPROHjwYEPj24FnfiBThB/IFOEHMkX4gUwRfiBThB/IFOEHMkWf/xsg6vOn+vBDhgwJ66m15VdeeWVYv+GGG0prjz32WDh27969YX3jxo1hfevWrWE90uo+/WWXXVZae+ONN8KxPT09TZkDz/xApgg/kCnCD2SK8AOZIvxApgg/kCnCD2Qq2ec3sxWSrpTU6+6zi+vGSfqDpGmStkq62t0/ad00j2+Nnhs/Nb6R8wWk+vgXXHBBWL/77rvD+tq1a0tr+/fvD8fu3r07rEe9ciles3///feHY++8886wnjqXwNixY8P6ddddV1pbuHBhOLZZannm/42kK466bpmkNe4+XdKa4nMAx5Fk+N19naSjfwVfJWllcXmlpEVNnheAFqv3Pf9Ed98uScXHeF8lAB2n5cf2m9lSSUtbfT8Ajk29z/w9ZjZZkoqPvWVf6O7L3X2uu8d/IQHQVvWG/1FJS4rLSyQ90pzpAGiXZPjN7AFJz0s6y8y6zexfJf1c0gIze0fSguJzAMcRa+X+61+7M7P23dlxpNHjABoxa9assP7kk0+G9TVr1oT1PXv2lNZ6e0vfLUqSZs6cGdYvuuiisP7ZZ5+V1kaPHh2OnTx5clh/7733wvo777wT1qM1+ddff304NsXd4x+oAkf4AZki/ECmCD+QKcIPZIrwA5ki/ECmvjGn7k61y1JbTaeWtka3n2rFNXp67BEjRoT1ffv2ldYmTpwYjl29enVYX7duXViPWnmS1N3dXVqbPXt2OPbiiy8O6zt27AjrX331VWkttVw4ahNK6S2+U63AadOmldZSLc633norrNeKZ34gU4QfyBThBzJF+IFMEX4gU4QfyBThBzL1jenzp3rtqV56o7ffiKFD429D1MeX4tNEP/XUU+HYTZs2hfUPP/wwrKd67Zdccklp7dxzzw3HpnrxqVOWn3zyyaW11PfztNNOC+svv/xyWE9tLx7d/uWXXx6Opc8PoCGEH8gU4QcyRfiBTBF+IFOEH8gU4Qcy1fY+f7QuPrXmPurNpvq2jdy2FM87dQxBo8cYzJ8/P6zfddddpbWPPvooHPvqq6+G9Wg9viQtWhTv0TpjxozS2rZt28Kxw4YNC+up4yOiNflTpkwJx6ZOvf3888+H9dTtR+v9U+emaBae+YFMEX4gU4QfyBThBzJF+IFMEX4gU4QfyFRyi24zWyHpSkm97j67uO4OSddJOrKY+zZ3/5/knWW6RfecOXPC+s033xzWL7zwwrD+yiuvlNY+/vjjcOz7778f1hcsWBDWzz///LC+efPm0trw4cPDsdF596V0Pzw6z0Fqvf2qVavC+kknnRTWp06dGtajuU+aNCkcm/qeNHOL7t9IumKQ6//T3ecU/5LBB9BZkuF393WS4lOqADjuNPKe/0Yze9XMVpjZqU2bEYC2qDf8v5L0HUlzJG2X9IuyLzSzpWbWZWZddd4XgBaoK/zu3uPufe5+WNKvJc0Lvna5u89197n1ThJA89UVfjObPODTxZJea850ALRLckmvmT0g6VJJp5tZt6SfSbrUzOZIcklbJf2khXME0ALJ8Lv7NYNcfW8L5qJRo0aF9Wj99oEDB8KxBw8eDOunnHJKWJ83r/Sdja699tpw7Nlnnx3We3p6wvoTTzwR1lPr2iOnn356WJ8+fXpY/+STT8L6iSeeWFpLHWOS+nkYMWJEWI+OYVi/fn04NvW4RMcQSOljFN5+++3S2syZM8OxZ555Zmkttc/CQBzhB2SK8AOZIvxApgg/kCnCD2SK8AOZauupu0eMGBGeynnjxo3h+DVr1pTWUm2jVKtv/PjxYX3IkCGltdSy2aeffjqsp9qUqeWjqa2qGxn7+uuvh/WzzjorrI8ZM6a0lmpRprb/fvbZZ8N6b29vaS11WvDU4xLdtpRugUb/t+hnTYrbiMeylTzP/ECmCD+QKcIPZIrwA5ki/ECmCD+QKcIPZKqtff7hw4eHy1u7uuIzfW3fvr20luoZp3qnqZ7y559/HtYjqaWnqaWrqeWhUW839f9O1Tdt2hTWU8cBnHpq+ekd9+/fH47dt29fWE8tw45On53q86e2dD906FBYHz16dFiPjt1Ifb937txZ97wG4pkfyBThBzJF+IFMEX4gU4QfyBThBzJF+IFMtX09/znnnFNaT/V99+zZU1pLrZ9Orc8eOXJkWB83blxpLTo9tZTuvab62TVso173faeOj0htNf3pp5+G9Wju0WMqSbNmzQrrqWMUou3BU9t7N3p8ROo4gb6+vtJa6twT0c8L6/kBJBF+IFOEH8gU4QcyRfiBTBF+IFOEH8hUss9vZlMl3SdpkqTDkpa7+3+b2ThJf5A0TdJWSVe7e9hsHzZsmCZNmlRanzZtWjiXqHcarfWX4p6vJO3atSusp9b7R1Jrx1M949RxBFGvPnXf0Xn1a6mnjgM477zzSmupYwzWrl0b1lPHbkTnUUgd/5B6zFPHpDTy8xIdAyAdWy8/Ussz/yFJt7j72ZL+SdINZjZL0jJJa9x9uqQ1xecAjhPJ8Lv7dnffUFzeI+lNSWdIukrSyuLLVkpa1KpJAmi+Y3rPb2bTJH1X0ouSJrr7dqn/F4SkCc2eHIDWqTn8ZjZK0kOSfuruNZ/QzsyWmlmXmXVFx+YDaK+awm9mw9Qf/N+6+6ri6h4zm1zUJ0sadOdCd1/u7nPdfW7qpIYA2icZfutf/nSvpDfd/ZcDSo9KWlJcXiLpkeZPD0CrWA3LRb8v6a+SNqm/1SdJt6n/ff8fJX1L0geSfujuuxO3Fd7ZokXx3wxvueWW0lqqbZQ6PXaqbRS1AlOn9U6dinn48OFhPdWui1paqf93Supxee6558L6gw8+WFp74YUXwrGpltf8+fPD+j333FNa27JlSzg29fP05ZdfhvW9e/eG9ehnYsqUKeHYxYsXl9a++OIL9fX1xeuVC8k+v7s/I6nsxuJHH0DH4gg/IFOEH8gU4QcyRfiBTBF+IFOEH8hUss/f1DtL9PkbkVoWO2fOnLA+b968sL5w4cLS2owZM8KxqVNUp5aHppYbHzhwoLS2evXqcOzjjz8e1lN9/CqNHTs2rEfHGERbZEvpPn7q1N+p8dGS4Q0bNoRjb7/99rDu7jX1+XnmBzJF+IFMEX4gU4QfyBThBzJF+IFMEX4gUx3V50/16lPru9F+qXMNNCK1VTUGR58fQIjwA5ki/ECmCD+QKcIPZIrwA5ki/ECmOqrPD6Bx9PkBhAg/kCnCD2SK8AOZIvxApgg/kCnCD2QqGX4zm2pma83sTTN73cxuKq6/w8w+MrONxb/yE9sD6DjJg3zMbLKkye6+wcxGS3pJ0iJJV0va6+7/UfOdcZAP0HK1HuQztIYb2i5pe3F5j5m9KemMxqYHoGrH9J7fzKZJ+q6kF4urbjSzV81shZmdWjJmqZl1mVlXQzMF0FQ1H9tvZqMk/UXSne6+yswmStopySX9m/rfGvxL4jZ42Q+0WK0v+2sKv5kNk/SYpCfd/ZeD1KdJeszdZyduh/ADLda0hT3Wvx3pvZLeHBj84g+BRyyW9NqxThJAdWr5a//3Jf1V0iZJh4urb5N0jaQ56n/Zv1XST4o/Dka3xTM/0GJNfdnfLIQfaD3W8wMIEX4gU4QfyBThBzJF+IFMEX4gU4QfyBThBzJF+IFMEX4gU4QfyBThBzJF+IFMEX4gU8kTeDbZTknvD/j89OK6TtSpc+vUeUnMrV7NnNs/1PqFbV3P/7U7N+ty97mVTSDQqXPr1HlJzK1eVc2Nl/1Apgg/kKmqw7+84vuPdOrcOnVeEnOrVyVzq/Q9P4DqVP3MD6AilYTfzK4ws7+Z2btmtqyKOZQxs61mtqnYebjSLcaKbdB6zey1AdeNM7M/m9k7xcdBt0mraG4dsXNzsLN0pY9dp+143faX/WY2RNLbkhZI6pa0XtI17v5GWydSwsy2Sprr7pX3hM3sYkl7Jd13ZDckM/t3Sbvd/efFL85T3f3WDpnbHTrGnZtbNLeynaV/rAofu2bueN0MVTzzz5P0rrtvdvevJP1e0lUVzKPjufs6SbuPuvoqSSuLyyvV/8PTdiVz6wjuvt3dNxSX90g6srN0pY9dMK9KVBH+MyR9OODzbnXWlt8u6Skze8nMllY9mUFMPLIzUvFxQsXzOVpy5+Z2Ompn6Y557OrZ8brZqgj/YLuJdFLL4Xvufr6kf5Z0Q/HyFrX5laTvqH8bt+2SflHlZIqdpR+S9FN3/7zKuQw0yLwqedyqCH+3pKkDPp8iaVsF8xiUu28rPvZKelj9b1M6Sc+RTVKLj70Vz+f/uXuPu/e5+2FJv1aFj12xs/RDkn7r7quKqyt/7AabV1WPWxXhXy9pupl928xOlPQjSY9WMI+vMbORxR9iZGYjJf1Anbf78KOSlhSXl0h6pMK5/J1O2bm5bGdpVfzYddqO15Uc5FO0Mv5L0hBJK9z9zrZPYhBm9o/qf7aX+lc8/q7KuZnZA5IuVf+qrx5JP5P0J0l/lPQtSR9I+qG7t/0PbyVzu1THuHNzi+ZWtrP0i6rwsWvmjtdNmQ9H+AF54gg/IFOEH8gU4QcyRfiBTBF+IFOEH8gU4QcyRfiBTP0fRmiaLxPxBrMAAAAASUVORK5CYII=\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x20459a6f198>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.imshow(img, cmap='gray')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "feature_data = fashion_mnist.values[:,1:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_data = fashion_mnist.values[:,0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "svc = SVC(kernel='rbf', C=.1, gamma=100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainX, testX, trainY, testY = train_test_split(feature_data, target_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import make_pipeline\n",
+    "from sklearn.preprocessing import MinMaxScaler\n",
+    "from sklearn.decomposition import PCA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline = make_pipeline(MinMaxScaler(), PCA(n_components=64), svc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import GridSearchCV"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = {\n",
+    "    'svc__C':[1,.1,.001,],\n",
+    "    'svc__gamma':[10]\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grid_model = GridSearchCV(pipeline, param_grid=params)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

	satisfaction_level	last_evaluation	number_project	average_montly_hours	time_spend_company	left	sales	salary
0	0.38	0.53	2	157	3	1	sales	low
1	0.80	0.86	5	262	6	1	sales	medium
2	0.11	0.88	7	272	4	1	sales	medium
3	0.72	0.87	5	223	5	1	sales	low
4	0.37	0.52	2	159	3	1	sales	low
	Text	Score
22098	We've had this for almost a week, and the dog ...	4
295111	I just have to put my two cents in on this tea...	5
125547	The Hanover Pumpernickle & Onion Pretzels are ...	5
350934	January 3, 2011<br /><br />I purchased these p...	4
502462	I received these and added them to the collect...	5
	avg_dist	avg_rating_by_driver	avg_rating_of_driver	avg_surge	city	last_trip_date	phone	signup_date	surge_pct	trips_in_first_30_days	luxury_car_user	weekday_pct
0	3.67	5.0	4.7	1.10	King's Landing	2014-06-17	iPhone	2014-01-25	15.4	4	True	46.2
1	8.26	5.0	5.0	1.00	Astapor	2014-05-05	Android	2014-01-29	0.0	0	False	50.0
2	0.77	5.0	4.3	1.00	Astapor	2014-01-07	iPhone	2014-01-06	0.0	3	False	100.0
3	2.36	4.9	4.6	1.14	King's Landing	2014-06-29	iPhone	2014-01-10	20.0	9	True	80.0
4	3.13	4.9	4.4	1.19	Winterfell	2014-03-15	Android	2014-01-27	11.8	14	False	82.4