Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solved all the questions with one logic change #4

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Empty file added assignment1/assignment1.ipynb
Empty file.
100 changes: 100 additions & 0 deletions assignment1/assignment1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

np.seterr(divide='ignore', invalid='ignore')

name= ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
dataset_file = "housing.csv"
df=pd.read_csv(dataset_file, delim_whitespace=True, names=name)
df.head()

row,col=df.shape
print("DF has ",row, "rows and ", col, " columns")
df.info()

df.corr()


df.describe()
x = df['LSTAT'].to_numpy().reshape(row, 1)
y = df['MEDV'].to_numpy().reshape(row, 1)

print(x.shape)
print(y.shape)

scaler = StandardScaler()
scaler.fit(x)
#y=scaler.fit(y)
x=scaler.transform(x)

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state = 22)
model = LinearRegression()
model.fit(x_train, y_train)

prediction=model.predict(x_test)
Rsquared = r2_score(y_test, prediction)
RMSE = np.sqrt(mean_squared_error(y_test, prediction))
print("RMSE:",RMSE)
print("R^2: ",Rsquared)
print(x_train.shape)
print(y_train.shape)
import matplotlib.pyplot as plt
plt.scatter(x_train,y_train)
plt.plot(x_train,model.predict(x_train))


polynomial = PolynomialFeatures(degree=2)
model_2 = make_pipeline(polynomial,model)
model_2.fit(x_train,y_train)
prediction_2=model_2.predict(x_test)
Rsquared = r2_score(y_test, prediction_2)
RMSER = np.sqrt(mean_squared_error(y_test, prediction_2))
print("RMSE:",RMSE)
print("R^2: ",Rsquared)
plt.figure()
plt.scatter(x_train,y_train,s=15)
plt.plot(x_train,model_2.predict(x_train),color="r")
plt.title("Polynomial regression with degree "+str(2))
plt.show()

polynomial = PolynomialFeatures(degree=20)
model_2 = make_pipeline(polynomial,model)
model_2.fit(x_train,y_train)
prediction_2=model_2.predict(x_test)
Rsquared = r2_score(y_test, prediction_2)
RMSE = np.sqrt(mean_squared_error(y_test, prediction_2))
print("RMSE:",RMSE)
print("R^2: ",Rsquared)
plt.figure()
plt.scatter(x_train,y_train,s=15)
plt.plot(x_train,model_2.predict(x_train),color="r")
plt.title("Polynomial regression with degree 20 "+str(2))
plt.show()

x2 = df[['LSTAT','RM',"PTRATIO"]].to_numpy()
y2 = df[['MEDV']].to_numpy()


scaler = StandardScaler()
scaler.fit(x2)
#y=scaler.fit(y)
x2=scaler.transform(x2)

x_train, x_test, y_train, y_test = train_test_split(x2,y,test_size=0.2,random_state = 22)
model = LinearRegression()
model.fit(x_train, y_train)

prediction=model.predict(x_test)
Rsquared = r2_score(y_test, prediction)
RMSE = np.sqrt(mean_squared_error(y_test, prediction))
print("RMSE:",RMSE)
print("R^2: ",Rsquared)
print(x_train.shape)
print(y_train.shape)

35 changes: 35 additions & 0 deletions assignment1/assignment1_output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Table has 506rows and 14 columns
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 CRIM 506 non-null float64
1 ZN 506 non-null float64
2 INDUS 506 non-null float64
3 CHAS 506 non-null int64
4 NOX 506 non-null float64
5 RM 506 non-null float64
6 AGE 506 non-null float64
7 DIS 506 non-null float64
8 RAD 506 non-null int64
9 TAX 506 non-null float64
10 PTRATIO 506 non-null float64
11 B 506 non-null float64
12 LSTAT 506 non-null float64
13 MEDV 506 non-null float64
dtypes: float64(12), int64(2)
memory usage: 55.5 KB
Linear Regression
RMSE: 6.558372421381304
R^2: 0.5149044976402835
Polynomial Regression
RMSE: 6.558372421381304
R^2: 0.5857844942953512
polynomial with 20 degree
RMSE: 5.701888182388953
R^2: 0.6333324378319982
Multiple Features 'LSTAT','RM','PTRATIO'
RMSE: 4.974434993788514
adjusted R Squared 0.7123807719300754
R^2: 0.720923917318291
224 changes: 224 additions & 0 deletions assignment2/assignment2.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "255 Assignment 2.ipynb",
"private_outputs": true,
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "i8U-1XufkmSg"
},
"source": [
"from sklearn.datasets import fetch_lfw_people\n",
"from sklearn.svm import SVC\n",
"from sklearn.decomposition import PCA as RandomizedPCA\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.svm import SVC\n",
"import seaborn as sns\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn import model_selection\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn import metrics"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "CD0jWoE9kxM0"
},
"source": [
"faces = fetch_lfw_people(min_faces_per_person=60)\n",
"print('data loaded')\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "XOkqgViXk6CF"
},
"source": [
"target_names = faces.target_names\n",
"n, h, w = faces.images.shape"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "i1VL_vrCm7yy"
},
"source": [
"photos = faces.data\n",
"labels = faces.target\n",
"xtrain, xtest, ytrain, ytest = model_selection.train_test_split(photos, labels, test_size=0.25,random_state=42)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "XadLyIGel0ZY"
},
"source": [
"pca = RandomizedPCA(n_components=150, svd_solver='randomized', whiten=True, random_state=42).fit(xtrain)\n",
"svc = SVC(kernel='rbf', class_weight='balanced')\n",
"model = make_pipeline(pca, svc)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "R_ns5nZnncyH"
},
"source": [
"C = np.logspace(-1, 5, 4)\n",
"gammas = np.logspace(-3, 0, 4)\n",
"params = dict(svc__gamma=gammas, svc__C=C)\n",
"gsv = model_selection.GridSearchCV(model, params)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "jVFZ328OoZZp"
},
"source": [
"gsv = gsv.fit(xtrain, ytrain)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "_Tzq1kwYplRv"
},
"source": [
"print(gsv.best_params_)\n",
"model = gsv.best_estimator_\n",
"ypred = model.predict(xtest)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "g7U8iDsWrawZ"
},
"source": [
"print(ypred)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "dYgFtFldr0LT"
},
"source": [
"r_ind = np.random.choice(xtest.shape[0], size=24, replace=False)\n",
"photos = xtest[r_ind]\n",
"labels_actual = ytest[r_ind]\n",
"labels_pred_sample = ypred[r_ind]\n",
"names_pred = target_names[labels_pred_sample]\n",
"names_actual = target_names[labels_actual]\n",
"print(metrics.classification_report(ytest, ypred, target_names=target_names))"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "rDslZ7t4sWgR"
},
"source": [
"#CONFUSIN MATRIX\n",
"confusion_matrix = metrics.confusion_matrix(names_pred,names_actual,labels=target_names)\n",
"print(confusion_matrix)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Ec1zuYf9skgF"
},
"source": [
"#HEAT MAP\n",
"sns.heatmap(confusion_matrix, annot=True)\n",
"plt.xlabel('Actual')\n",
"plt.ylabel('Predicted')\n",
"plt.show(block=True)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "orYgVPjpvfhn"
},
"source": [
"n_row=4\n",
"n_col=6\n",
"fig_title=\"Predictions\"\n",
"fig = plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))\n",
"plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)\n",
"for i in range(n_row * n_col):\n",
" ax = fig.add_subplot(n_row, n_col, i + 1)\n",
" ax.imshow(photos[i].reshape((h, w)), cmap=plt.cm.gray)\n",
" fc = 'black'\n",
" if names_pred[i]!=names_actual[i] :\n",
" fc = 'red'\n",
" title = \"Predicted: \"+names_pred[i]+\"\\nActual: \"+names_actual[i]\n",
" ax.set_title(names_pred[i], size=12,color=fc)\n",
" plt.xticks(())\n",
" plt.yticks(())\n",
"if fig_title: \n",
" fig.suptitle(fig_title+'\\n', fontsize=20)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "3hEgMHkewU87"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
Loading