commit-live-students · preetiail · Jan 18, 2019 · Jan 19, 2019
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_outlier_removal/__pycache__/build.cpython-36.pyc b/q01_outlier_removal/__pycache__/build.cpython-36.pyc
diff --git a/q01_outlier_removal/build.py b/q01_outlier_removal/build.py
@@ -1,8 +1,21 @@
+# %load q01_outlier_removal/build.py
 # Default imports
 import pandas as pd
+import numpy as np
 
 loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv')
 loan_data = loan_data.drop('Loan_ID', 1)
 
+loan_data
 
+def outlier_removal(loan_data):
+    loan_data1=loan_data.loc[:,['ApplicantIncome','CoapplicantIncome','LoanAmount']]
+    loan_data1=loan_data1.dropna()
+    loan_data2=loan_data1.sort_values(['ApplicantIncome','CoapplicantIncome','LoanAmount'])
+    upper_quartile = np.percentile(loan_data2,95)
+    h2=loan_data[loan_data2<upper_quartile]
+    return(h2)
+outlier_removal(loan_data)
 # Write your Solution here:
+
+
diff --git a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc
diff --git a/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc
diff --git a/q02_data_cleaning_all/build.py b/q02_data_cleaning_all/build.py
@@ -1,3 +1,4 @@
+# %load q02_data_cleaning_all/build.py
 # Default Imports
 import sys, os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname('__file__'))))
@@ -6,9 +7,29 @@
 from sklearn.model_selection import train_test_split
 from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal
 
+
+
 loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv')
-loan_data = loan_data.drop('Loan_ID', 1)
+#loan_dathttps://app2.commit.live/program/d96bd5ca-13fc-4eb5-8180-3fd7cbc0862b/detail/logistic_regression_project:q02_data_cleaning_all#data-files-panela = loan_data.drop('Loan_ID', 1)
 loan_data = outlier_removal(loan_data)
 
+def data_cleaning(loan_data):
+    X = loan_data.iloc[:,:-1]
+    y=loan_data['Loan_Status']
+    X['LoanAmount']=X['LoanAmount'].fillna(np.mean(X['LoanAmount']))
+    X['Gender'].count()
+    #n1=np.array(loan_data['Credit_History'])
+    #unique_elements, counts_elements = np.unique(n1, return_counts=True)
+    X['Gender']=X['Gender'].fillna('Female')
+    X['Married']=X['Married'].fillna('Yes')
+    X['Dependents']=X['Dependents'].fillna('3+')
+    X['Self_Employed']=X['Self_Employed'].fillna('No')
+    X['Loan_Amount_Term']=X['Loan_Amount_Term'].fillna('360')
+    X['Credit_History']=X['Credit_History'].fillna('1')
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=9)
+    return(X,y,X_train, X_test, y_train, y_test)
+
+
+data_cleaning(loan_data)
+
 
-# Write your solution here :
diff --git a/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc
diff --git a/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_logistic_regression/__pycache__/build.cpython-36.pyc b/q03_logistic_regression/__pycache__/build.cpython-36.pyc
diff --git a/q03_logistic_regression/build.py b/q03_logistic_regression/build.py
@@ -1,3 +1,4 @@
+# %load q03_logistic_regression/build.py
 # Default Imports
 import pandas as pd
 from sklearn.preprocessing import StandardScaler
@@ -12,7 +13,20 @@
 loan_data = outlier_removal(loan_data)
 X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data)
 X_train, X_test, y_train, y_test = data_cleaning_2(X_train, X_test, y_train, y_test)
+def logistic_regression(X_train, X_test, y_train, y_test):
+    #Scaling the data
+    scaler = StandardScaler()
+    StandardScaler(copy=True, with_mean=True, with_std=True)
+    scaler.fit(X_train)
+    scaler.transform(X_train)
+    scaler.fit(X_test)
+    scaler.transform(X_test)
+    #Logistic regression
+    model1=LogisticRegression(random_state=9).fit(X_train,y_train)
+    ypred=model1.predict(X_test)
+    #Confusion matrix
+    result=confusion_matrix(y_test, ypred)
+    return(result)
 
-
-# Write your solution code here:
+logistic_regression(X_train, X_test, y_train, y_test)
 
diff --git a/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc