-
Notifications
You must be signed in to change notification settings - Fork 0
/
code.py
66 lines (48 loc) · 1.42 KB
/
code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import pandas as pd
from numpy import *
import numpy as np
from sklearn import preprocessing
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn import neighbors
data =pd.read_csv('train dataset.csv')
array = data.values
for i in range(len(array)):
if array[i][0]=="Male":
array[i][0]=1
else:
array[i][0]=0
df=pd.DataFrame(array)
maindf =df[[0,1,2,3,4,5,6]]
mainarray=maindf.values
print (mainarray)
temp=df[7]
train_y =temp.values
# print(train_y)
# print(mainarray)
train_y=temp.values
for i in range(len(train_y)):
train_y[i] =str(train_y[i])
mul_lr = linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg',max_iter =1000)
mul_lr.fit(mainarray, train_y)
testdata =pd.read_csv('test dataset.csv')
test = testdata.values
for i in range(len(test)):
if test[i][0]=="Male":
test[i][0]=1
else:
test[i][0]=0
df1=pd.DataFrame(test)
testdf =df1[[0,1,2,3,4,5,6]]
maintestarray=testdf.values
print(maintestarray)
y_pred = mul_lr.predict(maintestarray)
for i in range(len(y_pred)) :
y_pred[i]=str((y_pred[i]))
DF = pd.DataFrame(y_pred,columns=['Predicted Personality'])
DF.index=DF.index+1
DF.index.names = ['Person No']
DF.to_csv("output.csv")