-
Notifications
You must be signed in to change notification settings - Fork 0
/
steel_plates_h2o.py
47 lines (36 loc) · 1.19 KB
/
steel_plates_h2o.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import matplotlib.pyplot as plt
import os
import itertools
import numpy as np
import random
from sklearn.model_selection import train_test_split
import pandas as pd
import h2o
from h2o.estimators.random_forest import H2ORandomForestEstimator
h2o.init()
df = pd.read_csv(os.getcwd()+'/steel_data.csv')
#df=h2o.import_file(os.getcwd()+'/steel_data.csv')
print type(df)
df.loc[df['V28']==1,'Output']=1
df.loc[df['V29']==1,'Output']=2
df.loc[df['V30']==1,'Output']=3
df.loc[df['V31']==1,'Output']=4
df.loc[df['V32']==1,'Output']=5
df.loc[df['V33']==1,'Output']=6
df.loc[df['Class']==2,'Output']=7
df=h2o.H2OFrame(df)
#print df
f=['V1','V2','V3','V4','V5','V6','V7']
train,test = df.split_frame([0.51])
#train,test= df.split_frame([0.51])
X_train=train.col_names[:-8]
print X_train
Y_train=train.col_names[-1]
def RandomForest(estimators=10):
model= H2ORandomForestEstimator(model_id="rf_steel_plates"+str(random.randint(1,10000)),ntrees=200,stopping_rounds=2,score_each_iteration=True,seed=1000000)
model.train(X_train,Y_train, training_frame=df)
path=h2o.save_model(model,path=os.getcwd())
result=model.predict(test[:-8])
print(model.model_performance(test))
RandomForest()
#h2o.cluster().shutdown()