commit-live-students · psicktrick · Dec 1, 2018 · Dec 19, 2018 · Dec 19, 2018 · Dec 19, 2018
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_outlier_removal/__pycache__/build.cpython-36.pyc b/q01_outlier_removal/__pycache__/build.cpython-36.pyc
diff --git a/q01_outlier_removal/build.py b/q01_outlier_removal/build.py
@@ -1,3 +1,4 @@
+# %load q01_outlier_removal/build.py
 # Default imports
 import pandas as pd
 
@@ -6,3 +7,15 @@
 
 
 # Write your Solution here:
+def outlier_removal(df):
+    q_vals = df.select_dtypes(include = ['int64', 'float64']).quantile(0.95)
+    num_feats = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']
+    for i in num_feats:
+        df = df.drop(df[df[i] > q_vals[i]].index)
+    print(df.shape)
+    return df
+q_vals.index.values.tolist()
+outlier_removal(loan_data)
+
+
+
diff --git a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc
diff --git a/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc
diff --git a/q02_data_cleaning_all/build.py b/q02_data_cleaning_all/build.py
@@ -1,3 +1,4 @@
+# %load q02_data_cleaning_all/build.py
 # Default Imports
 import sys, os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname('__file__'))))
@@ -12,3 +13,16 @@
 
 
 # Write your solution here :
+def data_cleaning(df):
+    X = df.iloc[:,:-1]
+    y = df.iloc[:,-1]
+    X['LoanAmount'] = X['LoanAmount'].fillna(X['LoanAmount'].mean())
+    cols = ['Gender','Married', 'Dependents', 'Self_Employed', 'Loan_Amount_Term', 'Credit_History']
+    for col in cols:
+        X[col] = X[col].fillna(X[col].mode()[0])
+    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.25, random_state=9)
+    return X, y, X_train, X_test, y_train, y_test
+
+loan_data.head()
+
+
diff --git a/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc
diff --git a/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc
diff --git a/q02_data_cleaning_all_2/build.py b/q02_data_cleaning_all_2/build.py
@@ -1,3 +1,4 @@
+# %load q02_data_cleaning_all_2/build.py
 # Default Imports
 import pandas as pd
 import numpy as np
@@ -11,3 +12,15 @@
 
 
 # Write your solution here :
+def data_cleaning_2(X_train, X_test, y_train, y_test):
+    num_col = X_train.select_dtypes(include=['int','float'])
+    for col in num_col.columns.values:
+        X_train.loc[:,col] = np.sqrt(X_train[col])
+        X_test.loc[:,col] = np.sqrt(X_test[col])
+
+    cols = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area']
+    X_train = pd.get_dummies(X_train, columns=cols, drop_first=True)
+    X_test = pd.get_dummies(X_test, columns=cols, drop_first=True)
+    return X_train, X_test, y_train, y_test
+
+
diff --git a/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc
diff --git a/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_logistic_regression/__pycache__/build.cpython-36.pyc b/q03_logistic_regression/__pycache__/build.cpython-36.pyc
diff --git a/q03_logistic_regression/build.py b/q03_logistic_regression/build.py
@@ -1,3 +1,4 @@
+# %load q03_logistic_regression/build.py
 # Default Imports
 import pandas as pd
 from sklearn.preprocessing import StandardScaler
@@ -15,4 +16,15 @@
 
 
 # Write your solution code here:
+def logistic_regression(X_train, X_test, y_train, y_test):
+    scale = StandardScaler()
+    cols = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']
+    X_train.loc[:,cols] = scale.fit_transform(X_train.loc[:,cols])
+    X_test.loc[:,cols] = scale.fit_transform(X_test.loc[:,cols])
+    log_reg = LogisticRegression(random_state=9)
+    log_reg.fit(X_train,y_train)
+    y_pred = log_reg.predict(X_test)
+    cm =  confusion_matrix(y_test,y_pred)
+    return cm
+
 
diff --git a/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc