Logistic_Regression


# Logistic Regression - Applicants that got admission vs applicants that didn't get admission

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def net_input(theta, x):
    # Computes the weighted sum of inputs
    return np.dot(x, theta)

def probability(theta, x):
    # Returns the probability after passing through sigmoid
    return sigmoid(net_input(theta, x))

def cost_function(self, theta, x, y):
    m = x.shape[0]
    total_cost = -(1 / m) * np.sum(y * np.log(probability(theta, x)) + (1 - y) * np.log(1 - probability(theta, x)))
    return total_cost

def gradient(self, theta, x, y):

    m = x.shape[0]
    return (1 / m) * np.dot(x.T, sigmoid(net_input(theta, x)) - y)

def fit(self, x, y, theta):
    opt_weights = fmin_tnc(func=cost_function, x0=theta, fprime=gradient,args=(x, y.flatten()))
    return opt_weights[0]
    

def load_data(path, header):
  
  marks_df = pd.read_csv(path, header=header)
  return marks_df


# load the data from the file
data = load_data("data/marks.txt", None)

# X = feature values
X = data.iloc[:, :-1]

# y = target values
y = data.iloc[:, -1]

# filter out the applicants that got admitted
admitted = data.loc[y == 1]

# filter out the applicants that din't get admission
not_admitted = data.loc[y == 0]

# plots
plt.scatter(admitted.iloc[:, 0], admitted.iloc[:, 1], s=10, label='Admitted')
plt.scatter(not_admitted.iloc[:, 0], not_admitted.iloc[:, 1], s=10, label='Not Admitted')
plt.legend()
plt.show()


#### Create the data ########

X = np.c_[np.ones((X.shape[0], 1)), X]
y = y[:, np.newaxis]
theta = np.zeros((X.shape[1], 1))

## Find the parameters ## 
parameters = fit(X, y, theta)
## [-25.16131856 0.20623159 0.20147149] ##


## Plotting the decision boundary ##

x_values = [np.min(X[:, 1] - 5), np.max(X[:, 2] + 5)]
y_values = - (parameters[0] + np.dot(parameters[1], x_values)) / parameters[2]

plt.plot(x_values, y_values, label='Decision Boundary')
plt.xlabel('Marks in 1st Exam')
plt.ylabel('Marks in 2nd Exam')
plt.legend()
plt.show()


## Find the accuracy of model ##

def predict(self, x):
    theta = parameters[:, np.newaxis]
    return probability(theta, x)
def accuracy(self, x, actual_classes, probab_threshold=0.5):
    predicted_classes = (predict(x) >= 
                         probab_threshold).astype(int)
    predicted_classes = predicted_classes.flatten()
    accuracy = np.mean(predicted_classes == actual_classes)
    return accuracy * 100
    
accuracy(X, y.flatten())
## 89% ##